Skip to main content

asm_rs/
encoder.rs

1//! x86-64 instruction encoder.
2//!
3//! Encodes parsed `Instruction`s into machine-code bytes.
4//! Uses a table-driven approach for opcode lookup with manual
5//! ModR/M, SIB, REX prefix construction.
6
7// VEX/SSE encoding helpers inherently require many parameters (opcode bytes,
8// prefix, W-bit, register operands, etc.); suppressing this lint is the
9// pragmatic choice over wrapping parameters in a struct that adds no clarity.
10#![allow(clippy::too_many_arguments)]
11
12#[allow(unused_imports)]
13use alloc::string::String;
14#[allow(unused_imports)]
15use alloc::string::ToString;
16#[allow(unused_imports)]
17use alloc::vec;
18use alloc::vec::Vec;
19
20use crate::error::AsmError;
21#[allow(unused_imports)]
22use crate::error::Span;
23use crate::ir::*;
24
25// ─── InstrBytes: stack-allocated instruction buffer ────────────────────
26
27/// Stack-allocated instruction byte buffer — eliminates per-instruction heap
28/// allocation on the encoding hot path.
29///
30/// x86/x86-64 instructions are at most 15 bytes; AArch64 and ARM32 are
31/// fixed at 4; RISC-V pseudo-instructions expand to at most 8 words (32
32/// bytes for RV64 `li` with a full 64-bit immediate).  This inline buffer
33/// covers **all** architectures without touching the heap.
34///
35/// Capacity: 32 bytes on the stack.
36#[derive(Clone)]
37pub struct InstrBytes {
38    data: [u8; 32],
39    len: u8,
40}
41
42impl InstrBytes {
43    /// Create an empty buffer.
44    #[inline]
45    pub const fn new() -> Self {
46        Self {
47            data: [0; 32],
48            len: 0,
49        }
50    }
51
52    /// Create a buffer pre-filled from a byte slice (max 32 bytes).
53    #[inline]
54    pub fn from_slice(src: &[u8]) -> Self {
55        let mut buf = Self::new();
56        buf.extend_from_slice(src);
57        buf
58    }
59
60    /// Append a single byte.
61    ///
62    /// # Panics
63    ///
64    /// Panics if the buffer is already full (15 bytes).
65    #[inline]
66    pub fn push(&mut self, byte: u8) {
67        assert!(
68            (self.len as usize) < 32,
69            "InstrBytes overflow: cannot push beyond 32 bytes"
70        );
71        self.data[self.len as usize] = byte;
72        self.len += 1;
73    }
74
75    /// Append a slice of bytes.
76    ///
77    /// # Panics
78    ///
79    /// Panics if appending would exceed the 15-byte capacity.
80    #[inline]
81    pub fn extend_from_slice(&mut self, bytes: &[u8]) {
82        let start = self.len as usize;
83        let end = start + bytes.len();
84        assert!(
85            end <= 32,
86            "InstrBytes overflow: {} + {} exceeds 32-byte capacity",
87            start,
88            bytes.len()
89        );
90        self.data[start..end].copy_from_slice(bytes);
91        self.len = end as u8;
92    }
93
94    /// Insert a byte at the given position, shifting subsequent bytes right.
95    ///
96    /// # Panics
97    ///
98    /// Panics if the buffer is full or `pos` is out of bounds.
99    #[inline]
100    pub fn insert(&mut self, pos: usize, byte: u8) {
101        let len = self.len as usize;
102        assert!(
103            pos <= len && len < 32,
104            "InstrBytes insert: pos={} len={} out of bounds",
105            pos,
106            len
107        );
108        // Shift bytes from pos..len right by 1
109        let mut i = len;
110        while i > pos {
111            self.data[i] = self.data[i - 1];
112            i -= 1;
113        }
114        self.data[pos] = byte;
115        self.len += 1;
116    }
117
118    /// Remove a byte at the given position, shifting subsequent bytes left.
119    ///
120    /// # Panics
121    ///
122    /// Panics if `pos` is out of bounds.
123    #[inline]
124    pub fn remove(&mut self, pos: usize) {
125        let len = self.len as usize;
126        assert!(
127            pos < len,
128            "InstrBytes remove: pos={} out of bounds (len={})",
129            pos,
130            len
131        );
132        let mut i = pos;
133        while i + 1 < len {
134            self.data[i] = self.data[i + 1];
135            i += 1;
136        }
137        self.data[len - 1] = 0;
138        self.len -= 1;
139    }
140
141    /// Number of bytes in the buffer.
142    #[inline]
143    pub fn len(&self) -> usize {
144        self.len as usize
145    }
146
147    /// Whether the buffer is empty.
148    #[inline]
149    pub fn is_empty(&self) -> bool {
150        self.len == 0
151    }
152
153    /// Convert to a heap-allocated `Vec<u8>`.
154    #[inline]
155    pub fn to_vec(&self) -> Vec<u8> {
156        self.as_ref().to_vec()
157    }
158}
159
160impl Default for InstrBytes {
161    #[inline]
162    fn default() -> Self {
163        Self::new()
164    }
165}
166
167impl core::ops::Deref for InstrBytes {
168    type Target = [u8];
169    #[inline]
170    fn deref(&self) -> &[u8] {
171        &self.data[..self.len as usize]
172    }
173}
174
175impl core::ops::DerefMut for InstrBytes {
176    #[inline]
177    fn deref_mut(&mut self) -> &mut [u8] {
178        &mut self.data[..self.len as usize]
179    }
180}
181
182impl AsRef<[u8]> for InstrBytes {
183    #[inline]
184    fn as_ref(&self) -> &[u8] {
185        self
186    }
187}
188
189impl AsMut<[u8]> for InstrBytes {
190    #[inline]
191    fn as_mut(&mut self) -> &mut [u8] {
192        self
193    }
194}
195
196impl core::fmt::Debug for InstrBytes {
197    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
198        f.debug_list().entries(self.iter()).finish()
199    }
200}
201
202impl PartialEq for InstrBytes {
203    fn eq(&self, other: &Self) -> bool {
204        **self == **other
205    }
206}
207
208impl Eq for InstrBytes {}
209
210impl PartialEq<[u8]> for InstrBytes {
211    fn eq(&self, other: &[u8]) -> bool {
212        **self == *other
213    }
214}
215
216impl PartialEq<Vec<u8>> for InstrBytes {
217    fn eq(&self, other: &Vec<u8>) -> bool {
218        **self == **other
219    }
220}
221
222// ─── EncodedInstr ──────────────────────────────────────────
223
224/// Result of encoding a single instruction.
225#[derive(Debug, Clone)]
226pub struct EncodedInstr {
227    /// The machine code bytes (long form for relaxable instructions).
228    pub bytes: InstrBytes,
229    /// If the instruction references a label, this records it for the linker.
230    pub relocation: Option<Relocation>,
231    /// If present, the instruction can be shortened via branch relaxation.
232    pub relax: Option<RelaxInfo>,
233}
234
235/// How the linker should patch the relocation target into the instruction.
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
237#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
238pub enum RelocKind {
239    /// x86 RIP-relative: raw `i8`/`i32` displacement written at `offset`.
240    /// `trailing_bytes` accounts for any immediate after the disp field.
241    X86Relative,
242    /// Absolute address written as raw LE bytes (1/2/4/8).
243    Absolute,
244    /// ARM32 B/BL: PC-relative offset >> 2 in bits 23:0 of the 32-bit word.
245    #[cfg(feature = "arm")]
246    ArmBranch24,
247    /// ARM32 LDR literal (PC-relative): 12-bit offset in bits 11:0, U-bit in bit 23.
248    #[cfg(feature = "arm")]
249    ArmLdrLit,
250    /// ARM32 ADR (data-processing immediate): 8-bit value with 4-bit rotation in bits 11:0,
251    /// ADD/SUB opcode in bits 24:21, U-sense via opcode (ADD=0x4, SUB=0x2).
252    #[cfg(feature = "arm")]
253    ArmAdr,
254    /// Thumb-2 conditional branch (16-bit): 8-bit signed offset >> 1 in bits 7:0.
255    /// Range: ±256 bytes from PC+4.
256    #[cfg(feature = "arm")]
257    ThumbBranch8,
258    /// Thumb unconditional branch (16-bit): 11-bit signed offset >> 1 in bits 10:0.
259    /// Range: ±2 KB from PC+4.
260    #[cfg(feature = "arm")]
261    ThumbBranch11,
262    /// Thumb-2 BL (32-bit): 25-bit offset across two halfwords. Range: ±16 MB.
263    #[cfg(feature = "arm")]
264    ThumbBl,
265    /// Thumb-2 B.W (32-bit wide unconditional branch): 24-bit offset. Range: ±16 MB.
266    #[cfg(feature = "arm")]
267    ThumbBranchW,
268    /// Thumb-2 B.cond.W (32-bit wide conditional branch): 20-bit offset. Range: ±1 MB.
269    #[cfg(feature = "arm")]
270    ThumbCondBranchW,
271    /// Thumb LDR Rt, \[PC, #imm8×4\]: 8-bit word-aligned PC-relative literal load.
272    /// Range: 0–1020 bytes forward only. PC = (instr_addr + 4) & ~3.
273    #[cfg(feature = "arm")]
274    ThumbLdrLit8,
275    /// AArch64 B/BL: PC-relative offset >> 2 in bits 25:0 of the 32-bit word.
276    #[cfg(feature = "aarch64")]
277    Aarch64Jump26,
278    /// AArch64 B.cond / CBZ / CBNZ: PC-relative offset >> 2 in bits 23:5.
279    #[cfg(feature = "aarch64")]
280    Aarch64Branch19,
281    /// AArch64 TBZ / TBNZ: PC-relative offset >> 2 in bits 18:5 (14-bit imm).
282    #[cfg(feature = "aarch64")]
283    Aarch64Branch14,
284    /// AArch64 LDR (literal): PC-relative offset >> 2 in bits 23:5.
285    #[cfg(feature = "aarch64")]
286    Aarch64LdrLit19,
287    /// AArch64 ADR: PC-relative offset with immhi (bits 23:5) and immlo (bits 30:29).
288    #[cfg(feature = "aarch64")]
289    Aarch64Adr21,
290    /// AArch64 ADRP: page-relative offset with immhi/immlo, target &= ~0xFFF.
291    #[cfg(feature = "aarch64")]
292    Aarch64Adrp,
293    /// AArch64 ADRP+ADD pair (8 bytes): ADR relaxation long form.
294    /// The first word is ADRP (patched with page offset), the second word
295    /// is ADD (patched with lo12 = target & 0xFFF).
296    #[cfg(feature = "aarch64")]
297    Aarch64AdrpAddPair,
298    /// RISC-V JAL: PC-relative offset in J-type immediate (bits 31:12), ±1MB range.
299    #[cfg(feature = "riscv")]
300    RvJal20,
301    /// RISC-V B-type branch: PC-relative offset in B-type immediate (bits 31:7), ±4KB range.
302    #[cfg(feature = "riscv")]
303    RvBranch12,
304    /// RISC-V AUIPC: upper 20 bits of PC-relative offset (bits 31:12).
305    #[cfg(feature = "riscv")]
306    RvAuipc20,
307    /// RISC-V C-extension CB-type branch: 9-bit signed PC-relative offset (±256 B).
308    #[cfg(feature = "riscv")]
309    RvCBranch8,
310    /// RISC-V C-extension CJ-type jump: 12-bit signed PC-relative offset (±2 KB).
311    #[cfg(feature = "riscv")]
312    RvCJump11,
313}
314
315/// A relocation record for unresolved labels.
316#[derive(Debug, Clone)]
317#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
318pub struct Relocation {
319    /// Offset within the instruction bytes where the relocation target is.
320    pub offset: usize,
321    /// Size of the relocation field in bytes (1, 2, or 4).
322    pub size: u8,
323    /// The label name to resolve.  Stored as `Rc<str>` so that cloning during
324    /// relocation propagation and linker resolution is a cheap refcount bump
325    /// instead of a heap allocation.
326    pub label: alloc::rc::Rc<str>,
327    /// How the linker patches the target address into the instruction bytes.
328    pub kind: RelocKind,
329    /// The addend for the relocation (constant offset).
330    pub addend: i64,
331    /// Number of instruction bytes that follow the relocation field.
332    /// For x86 RIP-relative relocations, the CPU computes EA = RIP + disp where
333    /// RIP = address of the byte AFTER the entire instruction.  When a trailing
334    /// immediate follows the displacement, we need this to calculate the correct
335    /// RIP at link time: `rip = reloc_addr + size + trailing_bytes`.
336    pub trailing_bytes: u8,
337}
338
339/// Information for branch relaxation — allows the linker to try a shorter encoding.
340///
341/// When present on an [`EncodedInstr`], the linker starts with this short form
342/// and only promotes to the long form (in `bytes`) when the target is out of
343/// ±127 byte range.  This implements Szymanski-style monotonic growth.
344#[derive(Debug, Clone)]
345pub struct RelaxInfo {
346    /// Complete short-form instruction bytes (opcode + placeholder rel8).
347    pub short_bytes: InstrBytes,
348    /// Offset of the rel8 displacement byte within `short_bytes`.
349    pub short_reloc_offset: usize,
350    /// Optional relocation for the short form.  When `Some`, the linker
351    /// applies this relocation to `short_bytes` instead of raw byte-patching.
352    /// Used for architectures like RISC-V where even the short form needs
353    /// complex bitfield manipulation.
354    pub short_relocation: Option<Relocation>,
355}
356
357/// Encode one instruction into machine code bytes.
358/// Extract label name and addend from a label or expression operand.
359///
360/// Returns `Some((label, addend))` for:
361/// - `Operand::Label("foo")` → `("foo", 0)`
362/// - `Operand::Expression(label + N)` → `("label", N)`
363///
364/// Returns `None` for non-label operands or expressions with multiple labels.
365#[inline]
366#[cfg(any(feature = "x86", feature = "x86_64", feature = "riscv"))]
367pub(crate) fn extract_label(op: &Operand) -> Option<(&str, i64)> {
368    match op {
369        Operand::Label(name) => Some((name.as_str(), 0)),
370        Operand::Expression(expr) => expr.label_addend(),
371        _ => None,
372    }
373}
374
375/// # Errors
376///
377/// Returns `Err(AsmError)` if the instruction mnemonic is unknown, the
378/// operand combination is invalid, or the target architecture is not
379/// supported.
380#[inline]
381pub fn encode_instruction(instr: &Instruction, arch: Arch) -> Result<EncodedInstr, AsmError> {
382    #[allow(unreachable_patterns)]
383    match arch {
384        #[cfg(feature = "x86_64")]
385        Arch::X86_64 => encode_x86_64(instr),
386        #[cfg(feature = "x86")]
387        Arch::X86 => encode_x86_32(instr),
388        #[cfg(feature = "arm")]
389        Arch::Arm | Arch::Thumb => crate::arm::encode_arm(instr, arch),
390        #[cfg(feature = "aarch64")]
391        Arch::Aarch64 => crate::aarch64::encode_aarch64(instr),
392        #[cfg(feature = "riscv")]
393        Arch::Rv32 | Arch::Rv64 => crate::riscv::encode_riscv(instr, arch),
394        _ => Err(AsmError::Syntax {
395            msg: alloc::format!(
396                "encoder not implemented for {} (enable the feature flag)",
397                arch
398            ),
399            span: instr.span,
400        }),
401    }
402}
403
404// ─── Shared x86/x86-64 helpers ───────────────────────────────
405
406/// Emit legacy prefixes (LOCK/REP/REPNE/segment) and memory-operand
407/// segment overrides into `buf`.  Returns the byte length of all emitted
408/// prefix bytes (needed later for displacement scanning).
409#[cfg(any(feature = "x86", feature = "x86_64"))]
410fn emit_x86_prefixes(buf: &mut InstrBytes, instr: &Instruction, ops: &[Operand]) -> usize {
411    for pfx in &instr.prefixes {
412        match pfx {
413            Prefix::Lock => buf.push(0xF0),
414            Prefix::Rep => buf.push(0xF3),
415            Prefix::Repne => buf.push(0xF2),
416            Prefix::SegFs => buf.push(0x64),
417            Prefix::SegGs => buf.push(0x65),
418        }
419    }
420
421    // Emit segment override from memory operand (must come before REX/opcode).
422    // Skip if already emitted via instr.prefixes (avoid double emission).
423    let has_seg_prefix = instr
424        .prefixes
425        .iter()
426        .any(|p| matches!(p, Prefix::SegFs | Prefix::SegGs));
427    if !has_seg_prefix {
428        for op in ops {
429            if let Operand::Memory(mem) = op {
430                if let Some(seg) = mem.segment {
431                    let seg_byte = match seg {
432                        Register::Cs => Some(0x2E_u8),
433                        Register::Ds => Some(0x3E),
434                        Register::Es => Some(0x26),
435                        Register::Fs => Some(0x64),
436                        Register::Gs => Some(0x65),
437                        Register::Ss => Some(0x36),
438                        _ => None,
439                    };
440                    if let Some(b) = seg_byte {
441                        buf.push(b);
442                    }
443                }
444            }
445        }
446    }
447
448    buf.len()
449}
450
451/// Check whether any memory operand uses 32-bit base/index registers,
452/// requiring the address-size override prefix (0x67) in 64-bit mode.
453///
454/// In 64-bit mode the default address size is 64 bits. Using `[eax]` or
455/// `[ecx+edx*4]` requires the `0x67` prefix to select 32-bit addressing.
456/// This is emitted only in `encode_x86_64`, not in `encode_x86_32` where
457/// 32-bit addressing is already the default.
458#[cfg(feature = "x86_64")]
459fn needs_addr_size_override(ops: &[Operand]) -> bool {
460    for op in ops {
461        if let Operand::Memory(mem) = op {
462            if let Some(base) = mem.base {
463                // RIP-relative is always 64-bit addressing — no override
464                if base == Register::Rip {
465                    continue;
466                }
467                if base.size_bits() == 32 {
468                    return true;
469                }
470            }
471            if let Some(idx) = mem.index {
472                if idx.size_bits() == 32 {
473                    return true;
474                }
475            }
476        }
477    }
478    false
479}
480
481/// Validate that a LOCK prefix is only used with a memory destination operand.
482#[cfg(any(feature = "x86", feature = "x86_64"))]
483fn validate_lock_prefix(instr: &Instruction, ops: &[Operand]) -> Result<(), AsmError> {
484    if instr.prefixes.contains(&Prefix::Lock) {
485        let has_memory_dst = matches!(ops.first(), Some(Operand::Memory(_)));
486        if !has_memory_dst {
487            return Err(AsmError::InvalidOperands {
488                detail: String::from("LOCK prefix requires a memory destination operand"),
489                span: instr.span,
490            });
491        }
492    }
493    Ok(())
494}
495
496/// If a memory operand carries a `disp_label` and the encoder didn't set a
497/// relocation explicitly, scan the encoded bytes for the displacement field
498/// and create the relocation.  Also computes `trailing_bytes` for
499/// RIP-relative relocations.
500#[cfg(any(feature = "x86", feature = "x86_64"))]
501fn propagate_disp_label_reloc(
502    buf: &[u8],
503    ops: &[Operand],
504    prefix_len: usize,
505    reloc: &mut Option<Relocation>,
506) {
507    if reloc.is_some() {
508        return;
509    }
510
511    for op in ops {
512        if let Operand::Memory(mem) = op {
513            if let Some(ref label) = mem.disp_label {
514                // Structurally locate the displacement offset by parsing
515                // the instruction's ModR/M + SIB layout, rather than
516                // byte-pattern scanning which is fragile when displacement
517                // bytes accidentally match opcode/prefix bytes.
518                if let Some(off) = find_disp_offset_structural(buf, prefix_len) {
519                    let kind = if mem.base == Some(Register::Rip) {
520                        RelocKind::X86Relative
521                    } else {
522                        RelocKind::Absolute
523                    };
524                    *reloc = Some(Relocation {
525                        offset: off,
526                        size: 4,
527                        label: alloc::rc::Rc::from(&**label),
528                        kind,
529                        addend: mem.disp,
530                        trailing_bytes: 0,
531                    });
532                }
533                break; // At most one relocation per instruction
534            }
535        }
536    }
537
538    // Compute trailing_bytes for RIP-relative relocations
539    if let Some(ref mut r) = reloc {
540        if r.kind == RelocKind::X86Relative {
541            let end_of_reloc = r.offset + r.size as usize;
542            r.trailing_bytes = (buf.len() - end_of_reloc) as u8;
543        }
544    }
545}
546
547/// Returns `true` for x86 legacy prefix bytes that an individual encoder may
548/// emit after the top-level `emit_x86_prefixes()` call: operand-size override
549/// (0x66), address-size override (0x67), and SSE mandatory prefixes (0xF2/0xF3).
550#[inline]
551#[cfg(any(feature = "x86", feature = "x86_64"))]
552fn is_post_prefix_legacy_byte(b: u8) -> bool {
553    matches!(b, 0x66 | 0x67 | 0xF2 | 0xF3)
554}
555
556/// Structurally locate the displacement field offset within an encoded x86
557/// instruction by parsing the prefix → opcode → ModR/M → SIB chain.
558///
559/// This replaces the previous byte-pattern scanning approach (which searched
560/// for `(disp as i32).to_le_bytes()` in the buffer) and is immune to false
561/// matches when displacement bytes happen to equal opcode or prefix bytes.
562///
563/// Returns `Some(offset)` — the byte index in `buf` where the displacement
564/// field starts — or `None` if the instruction has no memory displacement.
565#[cfg(any(feature = "x86", feature = "x86_64"))]
566fn find_disp_offset_structural(buf: &[u8], prefix_len: usize) -> Option<usize> {
567    if buf.len() <= prefix_len {
568        return None;
569    }
570
571    let mut pos = prefix_len;
572
573    // Skip legacy prefixes emitted by individual encoders (0x66 operand-size,
574    // 0x67 address-size, 0xF2/0xF3 mandatory SSE prefixes).  These always
575    // precede the REX/VEX/EVEX prefix and opcode.
576    while pos < buf.len() && is_post_prefix_legacy_byte(buf[pos]) {
577        pos += 1;
578    }
579
580    if pos >= buf.len() {
581        return None;
582    }
583
584    // ── VEX / EVEX ──────────────────────────────────────────────────
585    // VEX 2-byte: [C5] [RvvvvLpp]            → 1 opcode byte follows
586    // VEX 3-byte: [C4] [RXBmmmmm] [WvvvvLpp] → 1 opcode byte follows
587    // EVEX:       [62] [P0] [P1] [P2]         → 1 opcode byte follows
588    //
589    // The opcode map (0F / 0F38 / 0F3A) is encoded *inside* the VEX/EVEX
590    // prefix, so exactly one opcode byte appears before ModR/M.
591    let modrm_pos = if buf[pos] == 0xC5 {
592        pos + 3 // C5 + 1 prefix byte + 1 opcode
593    } else if buf[pos] == 0xC4 {
594        pos + 4 // C4 + 2 prefix bytes + 1 opcode
595    } else if buf[pos] == 0x62 {
596        pos + 5 // 62 + 3 prefix bytes + 1 opcode
597    } else {
598        // ── Legacy encoding ─────────────────────────────────────────
599        // Skip REX prefix (0x40..0x4F).  In 32-bit mode these bytes are
600        // the INC/DEC short forms which only encode register operands, so
601        // they never appear when a memory displacement needs locating.
602        if (buf[pos] & 0xF0) == 0x40 {
603            pos += 1;
604        }
605        if pos >= buf.len() {
606            return None;
607        }
608
609        // Parse opcode escape:
610        //   0x0F 0x38 xx → 3-byte opcode (ModR/M follows xx)
611        //   0x0F 0x3A xx → 3-byte opcode (ModR/M follows xx)
612        //   0x0F xx      → 2-byte opcode (ModR/M follows xx)
613        //   xx           → 1-byte opcode (ModR/M follows xx)
614        if buf[pos] == 0x0F {
615            pos += 1;
616            if pos >= buf.len() {
617                return None;
618            }
619            if buf[pos] == 0x38 || buf[pos] == 0x3A {
620                pos += 2; // escape extension byte + opcode byte
621            } else {
622                pos += 1; // opcode byte
623            }
624        } else {
625            pos += 1; // single-byte opcode
626        }
627
628        pos
629    };
630
631    // ── Parse ModR/M ────────────────────────────────────────────────
632    if modrm_pos >= buf.len() {
633        return None;
634    }
635
636    let modrm = buf[modrm_pos];
637    let mod_bits = (modrm >> 6) & 0x03;
638    let rm = modrm & 0x07;
639
640    // mod=11 → register-direct, no memory displacement
641    if mod_bits == 0x03 {
642        return None;
643    }
644
645    let mut disp_pos = modrm_pos + 1;
646
647    // r/m=100 → SIB byte follows ModR/M
648    if rm == 0x04 {
649        if disp_pos >= buf.len() {
650            return None;
651        }
652        let sib_base = buf[disp_pos] & 0x07;
653        disp_pos += 1; // skip SIB
654
655        match mod_bits {
656            0b00 if sib_base == 0x05 => Some(disp_pos), // [index*scale + disp32]
657            0b00 => None,                               // [base + index*scale]
658            0b01 | 0b10 => Some(disp_pos),              // [base + index*scale + disp]
659            _ => None,
660        }
661    } else {
662        match mod_bits {
663            0b00 if rm == 0x05 => Some(disp_pos), // [RIP + disp32] or [disp32]
664            0b00 => None,                         // [base], no displacement
665            0b01 | 0b10 => Some(disp_pos),        // [base + disp8/32]
666            _ => None,
667        }
668    }
669}
670
671/// Fix up `trailing_bytes` for any RIP-relative relocation.  Must be called
672/// AFTER the instruction buffer is final — works for both dispatch-created
673/// and disp_label-propagated relocations.
674#[cfg(feature = "x86_64")]
675fn fixup_rip_trailing_bytes(buf: &[u8], reloc: &mut Option<Relocation>) {
676    if let Some(ref mut r) = reloc {
677        if r.kind == RelocKind::X86Relative {
678            let end_of_reloc = r.offset + r.size as usize;
679            r.trailing_bytes = (buf.len() - end_of_reloc) as u8;
680        }
681    }
682}
683
684#[cfg(feature = "x86_64")]
685fn encode_x86_64(instr: &Instruction) -> Result<EncodedInstr, AsmError> {
686    let mut buf = InstrBytes::new();
687    let mut reloc: Option<Relocation> = None;
688    let mut relax_info: Option<RelaxInfo> = None;
689
690    let ops = &instr.operands;
691    let prefix_len = emit_x86_prefixes(&mut buf, instr, ops);
692
693    // Address-size override: emit 0x67 when memory operands use 32-bit
694    // base/index registers in 64-bit mode (e.g. `mov eax, [ecx]`).
695    if needs_addr_size_override(ops) {
696        buf.push(0x67);
697    }
698
699    let mnemonic = instr.mnemonic.as_str();
700
701    match crate::x86::dispatch_x86_64(mnemonic, &mut buf, ops, instr, &mut reloc, &mut relax_info) {
702        Some(Ok(())) => {}
703        Some(Err(e)) => return Err(e),
704        None => {
705            return Err(AsmError::UnknownMnemonic {
706                mnemonic: String::from(mnemonic),
707                arch: crate::error::ArchName::X86_64,
708                span: instr.span,
709            });
710        }
711    }
712
713    validate_lock_prefix(instr, ops)?;
714    propagate_disp_label_reloc(&buf, ops, prefix_len, &mut reloc);
715
716    // Compute trailing_bytes for RIP-relative relocations: the number of
717    // instruction bytes that follow the relocation field.  The CPU computes
718    // EA = RIP + disp32, where RIP = address past the ENTIRE instruction.
719    fixup_rip_trailing_bytes(&buf, &mut reloc);
720
721    Ok(EncodedInstr {
722        bytes: buf,
723        relocation: reloc,
724        relax: relax_info,
725    })
726}
727
728// ─── x86-32 encoder ──────────────────────────────────────────
729
730/// Validate that an instruction's operands are legal for 32-bit protected mode.
731#[cfg(feature = "x86")]
732fn validate_x86_32(instr: &Instruction) -> Result<(), AsmError> {
733    for op in &instr.operands {
734        match op {
735            Operand::Register(reg) => {
736                if reg.size_bits() == 64 {
737                    return Err(AsmError::InvalidOperands {
738                        detail: String::from("64-bit registers are not available in 32-bit mode"),
739                        span: instr.span,
740                    });
741                }
742                if reg.is_extended() {
743                    return Err(AsmError::InvalidOperands {
744                        detail: String::from(
745                            "extended registers (R8-R15) are not available in 32-bit mode",
746                        ),
747                        span: instr.span,
748                    });
749                }
750                if reg.requires_rex_for_byte() {
751                    return Err(AsmError::InvalidOperands {
752                        detail: String::from(
753                            "SPL/BPL/SIL/DIL are not available in 32-bit mode (require REX)",
754                        ),
755                        span: instr.span,
756                    });
757                }
758                if matches!(reg, Register::Rip) {
759                    return Err(AsmError::InvalidOperands {
760                        detail: String::from(
761                            "RIP-relative addressing is not available in 32-bit mode",
762                        ),
763                        span: instr.span,
764                    });
765                }
766            }
767            Operand::Memory(mem) => {
768                if let Some(base) = mem.base {
769                    if base == Register::Rip {
770                        return Err(AsmError::InvalidOperands {
771                            detail: String::from(
772                                "RIP-relative addressing is not available in 32-bit mode",
773                            ),
774                            span: instr.span,
775                        });
776                    }
777                    if base.size_bits() == 64 || base.is_extended() {
778                        return Err(AsmError::InvalidOperands {
779                            detail: String::from(
780                                "64-bit/extended registers cannot be used as memory base in 32-bit mode",
781                            ),
782                            span: instr.span,
783                        });
784                    }
785                }
786                if let Some(idx) = mem.index {
787                    if idx.size_bits() == 64 || idx.is_extended() {
788                        return Err(AsmError::InvalidOperands {
789                            detail: String::from(
790                                "64-bit/extended registers cannot be used as memory index in 32-bit mode",
791                            ),
792                            span: instr.span,
793                        });
794                    }
795                }
796            }
797            _ => {}
798        }
799    }
800
801    if instr.mnemonic == "movsxd" {
802        return Err(AsmError::UnknownMnemonic {
803            mnemonic: String::from("movsxd"),
804            arch: crate::error::ArchName::X86,
805            span: instr.span,
806        });
807    }
808
809    Ok(())
810}
811
812/// x86-32 (protected mode) encoder.
813///
814/// Shares the instruction encoding logic with x86-64 but:
815/// - Rejects 64-bit registers, extended registers (R8-R15), RIP-relative
816/// - push/pop accept 32-bit registers (default operand size)
817/// - All other instructions produce identical byte sequences
818#[cfg(feature = "x86")]
819fn encode_x86_32(instr: &Instruction) -> Result<EncodedInstr, AsmError> {
820    validate_x86_32(instr)?;
821
822    let mut buf = InstrBytes::new();
823    let mut reloc: Option<Relocation> = None;
824    let mut relax_info: Option<RelaxInfo> = None;
825
826    let ops = &instr.operands;
827    let prefix_len = emit_x86_prefixes(&mut buf, instr, ops);
828    let mnemonic = instr.mnemonic.as_str();
829
830    // Handle push/pop/inc/dec specially — 32-bit mode has dedicated short forms.
831    match mnemonic {
832        "push" => {
833            encode_push_32(&mut buf, ops, instr, &mut reloc)?;
834            return Ok(EncodedInstr {
835                bytes: buf,
836                relocation: reloc,
837                relax: relax_info,
838            });
839        }
840        "pop" => {
841            encode_pop_32(&mut buf, ops, instr)?;
842            return Ok(EncodedInstr {
843                bytes: buf,
844                relocation: reloc,
845                relax: relax_info,
846            });
847        }
848        "inc" | "dec" => {
849            // In 32-bit mode, INC r16/r32 (0x40+rd) and DEC r16/r32 (0x48+rd)
850            // are single-byte short forms. These opcodes are repurposed as
851            // REX prefixes in 64-bit mode, so this path is x86-32 only.
852            // For 8-bit regs and memory operands, fall through to the
853            // generic encoder which uses the 0xFE/0xFF ModR/M form.
854            if ops.len() == 1 {
855                if let Operand::Register(reg) = &ops[0] {
856                    let size = reg_size(*reg);
857                    if size == 16 || size == 32 {
858                        if size == 16 {
859                            buf.push(0x66);
860                        }
861                        let base = if mnemonic == "inc" { 0x40 } else { 0x48 };
862                        buf.push(base + reg.base_code());
863                        return Ok(EncodedInstr {
864                            bytes: buf,
865                            relocation: reloc,
866                            relax: relax_info,
867                        });
868                    }
869                }
870            }
871            // 8-bit / memory forms — fall through to generic dispatch
872        }
873        _ => {}
874    }
875
876    // Everything else: reuse the x86-64 dispatch.
877    // Since we've validated no 64-bit/extended registers are present,
878    // the encoder functions will not emit REX prefixes — producing
879    // valid 32-bit code.
880    match crate::x86::dispatch_x86_64(mnemonic, &mut buf, ops, instr, &mut reloc, &mut relax_info) {
881        Some(Ok(())) => {}
882        Some(Err(e)) => return Err(e),
883        None => {
884            return Err(AsmError::UnknownMnemonic {
885                mnemonic: String::from(mnemonic),
886                arch: crate::error::ArchName::X86,
887                span: instr.span,
888            });
889        }
890    }
891
892    validate_lock_prefix(instr, ops)?;
893    propagate_disp_label_reloc(&buf, ops, prefix_len, &mut reloc);
894
895    Ok(EncodedInstr {
896        bytes: buf,
897        relocation: reloc,
898        relax: relax_info,
899    })
900}
901
902// ─── x86-16 encoder (real mode) ─────────────────────────────
903
904/// x86-16 (real mode) encoder.
905///
906/// In 16-bit mode the default operand size is 16 bits and the default
907/// address size is 16 bits.  The `0x66` prefix switches operand size to
908/// 32 bits, and `0x67` switches address size to 32 bits — the reverse
909/// of 32-bit protected mode.
910///
911/// Implementation: reuse the 32-bit encoder (which handles push/pop/inc/dec
912/// short forms, segment registers, etc.) and then toggle the `0x66` prefix:
913///
914///   - If `0x66` is present → remove it (16-bit is now the default)
915///   - If `0x66` is absent AND the instruction uses 32-bit GPRs → add it
916#[cfg(feature = "x86")]
917pub fn encode_instruction_16(instr: &Instruction) -> Result<EncodedInstr, AsmError> {
918    // Step 1: Encode using the 32-bit encoder (handles push/pop/inc/dec
919    // short forms, segment registers, LOCK prefix validation, etc.)
920    let mut result = encode_x86_32(instr)?;
921
922    // Step 2: Toggle the 0x66 operand-size prefix.
923    // The 32-bit encoder adds 0x66 for 16-bit operands (non-default in
924    // 32-bit mode).  In 16-bit mode the semantics reverse: 16-bit is
925    // default (remove prefix) and 32-bit needs the prefix (add it).
926    toggle_operand_size_prefix_16(&mut result.bytes, &instr.operands, &mut result.relocation);
927
928    Ok(result)
929}
930
931/// Toggle the 0x66 operand-size prefix for 16-bit mode encoding.
932///
933/// In 16-bit mode: remove 0x66 if present (16-bit is now default),
934/// or add 0x66 if absent and instruction uses 32-bit registers.
935#[cfg(feature = "x86")]
936fn toggle_operand_size_prefix_16(
937    buf: &mut InstrBytes,
938    ops: &[Operand],
939    reloc: &mut Option<Relocation>,
940) {
941    // Scan for a 0x66 byte in the prefix region (before the first non-prefix byte).
942    let mut found_66_at = None;
943    for i in 0..buf.len() {
944        let b = buf[i];
945        if b == 0x66 {
946            found_66_at = Some(i);
947            break;
948        }
949        // Stop at the first non-prefix byte (0x67 is addr-size, still a prefix)
950        if !is_legacy_prefix(b) && b != 0x67 {
951            break;
952        }
953    }
954
955    if let Some(pos) = found_66_at {
956        // Remove 0x66 — in 16-bit mode, 16-bit operands don't need it
957        buf.remove(pos);
958        if let Some(ref mut r) = reloc {
959            if r.offset > pos {
960                r.offset -= 1;
961            }
962        }
963    } else {
964        // Check if the instruction uses 32-bit GP registers (not EIP).
965        // If so, add 0x66 — in 16-bit mode, 32-bit operands need the override.
966        let has_32bit_gpr = ops.iter().any(|op| {
967            if let Operand::Register(r) = op {
968                r.size_bits() == 32 && !matches!(r, Register::Eip)
969            } else {
970                false
971            }
972        });
973
974        // Also check for DWORD memory size annotation (e.g., mov dword [bx], 1).
975        let has_dword_mem = ops.iter().any(
976            |op| matches!(op, Operand::Memory(m) if m.size == Some(crate::ir::OperandSize::Dword)),
977        );
978
979        if has_32bit_gpr || has_dword_mem {
980            // Insert 0x66 after any existing prefixes
981            let mut insert_pos = 0;
982            for i in 0..buf.len() {
983                let b = buf[i];
984                if is_legacy_prefix(b) || b == 0x67 {
985                    insert_pos = i + 1;
986                } else {
987                    break;
988                }
989            }
990            buf.insert(insert_pos, 0x66);
991            if let Some(ref mut r) = reloc {
992                if r.offset >= insert_pos {
993                    r.offset += 1;
994                }
995            }
996        }
997    }
998}
999
1000/// Check if a byte is a legacy x86 prefix.
1001#[cfg(feature = "x86")]
1002#[inline]
1003fn is_legacy_prefix(b: u8) -> bool {
1004    matches!(
1005        b,
1006        0xF0 | 0xF2 | 0xF3 | 0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 | 0x66 | 0x67
1007    )
1008}
1009#[cfg(feature = "x86")]
1010fn encode_push_32(
1011    buf: &mut InstrBytes,
1012    ops: &[Operand],
1013    instr: &Instruction,
1014    reloc: &mut Option<Relocation>,
1015) -> Result<(), AsmError> {
1016    if ops.len() != 1 {
1017        return Err(invalid_operands("push", "expected 1 operand", instr.span));
1018    }
1019    match &ops[0] {
1020        Operand::Register(reg) => {
1021            let size = reg.size_bits();
1022            // Segment register push — all 6 valid in 32-bit mode
1023            match reg {
1024                Register::Es => {
1025                    buf.push(0x06);
1026                    return Ok(());
1027                }
1028                Register::Cs => {
1029                    buf.push(0x0E);
1030                    return Ok(());
1031                }
1032                Register::Ss => {
1033                    buf.push(0x16);
1034                    return Ok(());
1035                }
1036                Register::Ds => {
1037                    buf.push(0x1E);
1038                    return Ok(());
1039                }
1040                Register::Fs => {
1041                    buf.push(0x0F);
1042                    buf.push(0xA0);
1043                    return Ok(());
1044                }
1045                Register::Gs => {
1046                    buf.push(0x0F);
1047                    buf.push(0xA8);
1048                    return Ok(());
1049                }
1050                _ => {}
1051            }
1052            if size == 8 {
1053                return Err(invalid_operands(
1054                    "push",
1055                    "push does not accept 8-bit registers",
1056                    instr.span,
1057                ));
1058            }
1059            if size == 16 {
1060                buf.push(0x66); // operand size override for 16-bit
1061            }
1062            buf.push(0x50 + reg.base_code());
1063        }
1064        Operand::Immediate(imm) => {
1065            if *imm >= i8::MIN as i128 && *imm <= i8::MAX as i128 {
1066                buf.push(0x6A);
1067                buf.push(*imm as i8 as u8);
1068            } else if *imm >= i32::MIN as i128 && *imm <= u32::MAX as i128 {
1069                buf.push(0x68);
1070                buf.extend_from_slice(&(*imm as i32).to_le_bytes());
1071            } else {
1072                return Err(invalid_operands(
1073                    "push",
1074                    "immediate value out of range for push (must fit in 32 bits)",
1075                    instr.span,
1076                ));
1077            }
1078        }
1079        Operand::Memory(mem) => {
1080            let msize = mem.size.map(|s| s.bits()).unwrap_or(32);
1081            if msize == 16 {
1082                buf.push(0x66);
1083            }
1084            buf.push(0xFF);
1085            emit_mem_modrm(buf, 6, mem);
1086        }
1087        op @ (Operand::Label(_) | Operand::Expression(_)) => {
1088            let Some((label, addend)) = extract_label(op) else {
1089                return Err(invalid_operands("push", "unsupported operand", instr.span));
1090            };
1091            buf.push(0x68);
1092            let reloc_off = buf.len();
1093            buf.extend_from_slice(&0i32.to_le_bytes());
1094            *reloc = Some(Relocation {
1095                offset: reloc_off,
1096                size: 4,
1097                label: alloc::rc::Rc::from(label),
1098                kind: RelocKind::Absolute,
1099                addend,
1100                trailing_bytes: 0,
1101            });
1102        }
1103        _ => return Err(invalid_operands("push", "unsupported operand", instr.span)),
1104    }
1105    Ok(())
1106}
1107
1108/// x86-32 pop: accepts 32-bit and 16-bit GP registers + FS/GS/DS/ES/SS.
1109#[cfg(feature = "x86")]
1110fn encode_pop_32(
1111    buf: &mut InstrBytes,
1112    ops: &[Operand],
1113    instr: &Instruction,
1114) -> Result<(), AsmError> {
1115    if ops.len() != 1 {
1116        return Err(invalid_operands("pop", "expected 1 operand", instr.span));
1117    }
1118    match &ops[0] {
1119        Operand::Register(reg) => {
1120            let size = reg.size_bits();
1121            match reg {
1122                Register::Es => {
1123                    buf.push(0x07);
1124                    return Ok(());
1125                }
1126                Register::Ss => {
1127                    buf.push(0x17);
1128                    return Ok(());
1129                }
1130                Register::Ds => {
1131                    buf.push(0x1F);
1132                    return Ok(());
1133                }
1134                Register::Fs => {
1135                    buf.push(0x0F);
1136                    buf.push(0xA1);
1137                    return Ok(());
1138                }
1139                Register::Gs => {
1140                    buf.push(0x0F);
1141                    buf.push(0xA9);
1142                    return Ok(());
1143                }
1144                Register::Cs => {
1145                    return Err(invalid_operands("pop", "cannot pop into CS", instr.span));
1146                }
1147                _ => {}
1148            }
1149            if size == 8 {
1150                return Err(invalid_operands(
1151                    "pop",
1152                    "pop does not accept 8-bit registers",
1153                    instr.span,
1154                ));
1155            }
1156            if size == 16 {
1157                buf.push(0x66);
1158            }
1159            buf.push(0x58 + reg.base_code());
1160        }
1161        Operand::Memory(mem) => {
1162            let msize = mem.size.map(|s| s.bits()).unwrap_or(32);
1163            if msize == 16 {
1164                buf.push(0x66);
1165            }
1166            buf.push(0x8F);
1167            emit_mem_modrm(buf, 0, mem);
1168        }
1169        _ => return Err(invalid_operands("pop", "unsupported operand", instr.span)),
1170    }
1171    Ok(())
1172}
1173
1174// ─── REX / ModR/M / SIB helpers ──────────────────────────────
1175
1176/// Build a REX prefix byte.
1177#[inline]
1178#[cfg(any(feature = "x86", feature = "x86_64"))]
1179pub(crate) fn rex(w: bool, r: bool, x: bool, b: bool) -> u8 {
1180    let mut val: u8 = 0x40;
1181    if w {
1182        val |= 0x08;
1183    }
1184    if r {
1185        val |= 0x04;
1186    }
1187    if x {
1188        val |= 0x02;
1189    }
1190    if b {
1191        val |= 0x01;
1192    }
1193    val
1194}
1195
1196/// Whether a REX prefix with at least one flag is needed.
1197#[inline]
1198#[cfg(any(feature = "x86", feature = "x86_64"))]
1199pub(crate) fn needs_rex(w: bool, r: bool, x: bool, b: bool) -> bool {
1200    w || r || x || b
1201}
1202
1203/// Build ModR/M byte.
1204#[inline]
1205#[cfg(any(feature = "x86", feature = "x86_64"))]
1206pub(crate) fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 {
1207    (mod_ << 6) | ((reg & 7) << 3) | (rm & 7)
1208}
1209
1210/// Build SIB byte.
1211#[inline]
1212#[cfg(any(feature = "x86", feature = "x86_64"))]
1213fn sib(scale: u8, index: u8, base: u8) -> u8 {
1214    let ss = match scale {
1215        1 => 0,
1216        2 => 1,
1217        4 => 2,
1218        8 => 3,
1219        _ => 0,
1220    };
1221    (ss << 6) | ((index & 7) << 3) | (base & 7)
1222}
1223
1224/// Get the operand size from a register as u8 (for GP registers only, panics for vector).
1225#[inline]
1226#[cfg(any(feature = "x86", feature = "x86_64"))]
1227pub(crate) fn reg_size(reg: Register) -> u8 {
1228    let s = reg.size_bits();
1229    debug_assert!(s <= 128, "reg_size() used on vector register wider than u8");
1230    s as u8
1231}
1232
1233/// Check if using a high-byte register (AH, BH, CH, DH) together with any operand
1234/// that requires a REX prefix.  On x86-64, a REX byte changes the meaning of
1235/// register codes 4-7 from AH/CH/DH/BH to SPL/BPL/SIL/DIL, so the two are
1236/// incompatible.  Returns an error if the conflict is detected.
1237#[cfg(any(feature = "x86", feature = "x86_64"))]
1238pub(crate) fn check_high_byte_rex_conflict(
1239    regs: &[Register],
1240    span: crate::error::Span,
1241) -> Result<(), AsmError> {
1242    let has_high = regs.iter().any(|r| r.is_high_byte());
1243    let needs_rex = regs
1244        .iter()
1245        .any(|r| r.is_extended() || r.requires_rex_for_byte() || r.size_bits() == 64);
1246    if has_high && needs_rex {
1247        return Err(AsmError::InvalidOperands {
1248            detail: String::from(
1249                "high-byte registers (AH, BH, CH, DH) cannot be used with REX-requiring operands (64-bit regs, extended regs R8-R15, SPL/BPL/SIL/DIL)"
1250            ),
1251            span,
1252        });
1253    }
1254    Ok(())
1255}
1256
1257/// Emit REX prefix if needed, then opcode + ModR/M for reg,reg.
1258#[inline]
1259#[cfg(any(feature = "x86", feature = "x86_64"))]
1260fn emit_rr(
1261    buf: &mut InstrBytes,
1262    opcode: &[u8],
1263    dst: Register,
1264    src: Register,
1265    span: crate::error::Span,
1266) -> Result<(), AsmError> {
1267    let size = reg_size(dst);
1268    let w = size == 64;
1269    let r = src.is_extended();
1270    let b = dst.is_extended();
1271
1272    // Validate high-byte / REX conflict
1273    if size == 8 {
1274        check_high_byte_rex_conflict(&[dst, src], span)?;
1275    }
1276
1277    // 16-bit operand size prefix
1278    if size == 16 {
1279        buf.push(0x66);
1280    }
1281
1282    // REX
1283    let need_rex =
1284        needs_rex(w, r, false, b) || dst.requires_rex_for_byte() || src.requires_rex_for_byte();
1285    if need_rex {
1286        buf.push(rex(w, r, false, b));
1287    }
1288
1289    buf.extend_from_slice(opcode);
1290    buf.push(modrm(0b11, src.base_code(), dst.base_code()));
1291    Ok(())
1292}
1293
1294/// If the memory operand has a `disp_label`, create a relocation entry.
1295#[inline]
1296#[cfg(any(feature = "x86", feature = "x86_64"))]
1297pub(crate) fn set_mem_reloc(
1298    reloc: &mut Option<Relocation>,
1299    mem: &MemoryOperand,
1300    disp_offset: Option<usize>,
1301    buf_len: usize,
1302) {
1303    if let Some(ref label) = mem.disp_label {
1304        *reloc = Some(Relocation {
1305            offset: disp_offset.unwrap_or(buf_len),
1306            size: 4,
1307            label: alloc::rc::Rc::from(&**label),
1308            kind: if mem.base == Some(Register::Rip) {
1309                RelocKind::X86Relative
1310            } else {
1311                RelocKind::Absolute
1312            },
1313            addend: mem.disp,
1314            trailing_bytes: 0, // updated by encode_x86_64 after instruction is complete
1315        });
1316    }
1317}
1318
1319/// Emit ModR/M + SIB + displacement for a memory operand.
1320/// Returns the offset where a relocation displacement starts (if any).
1321#[cfg(any(feature = "x86", feature = "x86_64"))]
1322pub(crate) fn emit_mem_modrm(
1323    buf: &mut InstrBytes,
1324    reg_field: u8,
1325    mem: &MemoryOperand,
1326) -> Option<usize> {
1327    // NOTE: Segment override prefix is emitted in encode_x86_64 BEFORE the
1328    // REX/opcode bytes. Do NOT emit it here (after the opcode).
1329
1330    let base = mem.base;
1331    let index = mem.index;
1332    let disp = mem.disp;
1333
1334    // RIP-relative addressing: [rip + disp32]
1335    if base == Some(Register::Rip) && index.is_none() {
1336        buf.push(modrm(0b00, reg_field, 0b101));
1337        let reloc_offset = buf.len();
1338        buf.extend_from_slice(&(disp as i32).to_le_bytes());
1339        return Some(reloc_offset);
1340    }
1341
1342    // Absolute address / displacement only: [disp32]
1343    if base.is_none() && index.is_none() {
1344        // In 64-bit mode, we need SIB to encode absolute address
1345        buf.push(modrm(0b00, reg_field, 0b100));
1346        buf.push(sib(1, 0b100, 0b101));
1347        let reloc_offset = buf.len();
1348        buf.extend_from_slice(&(disp as i32).to_le_bytes());
1349        return Some(reloc_offset);
1350    }
1351
1352    // SIB index-only: [index*scale + disp32] — no base register.
1353    // Must use mod=00, base=101 (means "no base, disp32 follows").
1354    if let (None, Some(idx_reg)) = (base, index) {
1355        buf.push(modrm(0b00, reg_field, 0b100));
1356        buf.push(sib(mem.scale, idx_reg.base_code(), 0b101));
1357        let reloc_offset = buf.len();
1358        buf.extend_from_slice(&(disp as i32).to_le_bytes());
1359        return Some(reloc_offset);
1360    }
1361
1362    // SAFETY: At this point base is guaranteed Some — displacement-only
1363    // (mod=00, r/m=5) and index-only (SIB with base=5) both returned early
1364    // above.  Every remaining path uses a base register.
1365    let base = base?;
1366
1367    // Determine if we need SIB
1368    let need_sib = index.is_some() || base.base_code() == 4; // RSP/R12 need SIB
1369
1370    let (mod_bits, disp_size) = if disp == 0 && base.base_code() != 5 {
1371        // mod=00, no displacement (unless base is RBP/R13)
1372        (0b00, 0)
1373    } else if (-128..=127).contains(&disp) {
1374        (0b01, 1)
1375    } else {
1376        (0b10, 4)
1377    };
1378
1379    if need_sib {
1380        let idx_reg = index.unwrap_or(Register::Rsp); // 0b100 = no index
1381
1382        buf.push(modrm(mod_bits, reg_field, 0b100));
1383        buf.push(sib(mem.scale, idx_reg.base_code(), base.base_code()));
1384    } else {
1385        buf.push(modrm(mod_bits, reg_field, base.base_code()));
1386    }
1387
1388    let reloc_offset = if disp_size > 0 { Some(buf.len()) } else { None };
1389
1390    match disp_size {
1391        1 => buf.push(disp as i8 as u8),
1392        4 => buf.extend_from_slice(&(disp as i32).to_le_bytes()),
1393        _ => {}
1394    }
1395
1396    reloc_offset
1397}
1398
1399/// Emit REX prefix for a reg+mem operation.
1400#[inline]
1401#[cfg(any(feature = "x86", feature = "x86_64"))]
1402pub(crate) fn emit_rex_for_reg_mem(
1403    buf: &mut InstrBytes,
1404    reg: Register,
1405    mem: &MemoryOperand,
1406) -> Result<(), AsmError> {
1407    let w = reg.size_bits() == 64;
1408    let r = reg.is_extended();
1409    let x = mem.index.is_some_and(|r| r.is_extended());
1410    let b = mem.base.is_some_and(|r| r.is_extended());
1411
1412    if reg.size_bits() == 16 {
1413        buf.push(0x66);
1414    }
1415
1416    // Validate high-byte / REX conflict: AH/BH/CH/DH + extended base/index
1417    if reg.size_bits() == 8 && reg.is_high_byte() {
1418        let mem_needs_rex = x || b;
1419        if mem_needs_rex {
1420            return Err(AsmError::InvalidOperands {
1421                detail: String::from(
1422                    "high-byte registers (AH, BH, CH, DH) cannot be used with memory operands requiring REX prefix (R8-R15 base/index)"
1423                ),
1424                span: crate::error::Span { line: 0, col: 0, offset: 0, len: 0 },
1425            });
1426        }
1427    }
1428
1429    let need = needs_rex(w, r, x, b) || reg.requires_rex_for_byte();
1430    if need {
1431        buf.push(rex(w, r, x, b));
1432    }
1433    Ok(())
1434}
1435
1436/// Emit REX prefix for a /digit+mem operation (no separate reg operand).
1437#[inline]
1438#[cfg(any(feature = "x86", feature = "x86_64"))]
1439pub(crate) fn emit_rex_for_digit_mem(buf: &mut InstrBytes, size: u8, mem: &MemoryOperand) {
1440    let w = size == 64;
1441    let x = mem.index.is_some_and(|r| r.is_extended());
1442    let b = mem.base.is_some_and(|r| r.is_extended());
1443
1444    if size == 16 {
1445        buf.push(0x66);
1446    }
1447    if needs_rex(w, false, x, b) {
1448        buf.push(rex(w, false, x, b));
1449    }
1450}
1451
1452// ─── Instruction encoders ─────────────────────────────────────
1453
1454#[cfg(any(feature = "x86", feature = "x86_64"))]
1455pub(crate) fn encode_nop(
1456    buf: &mut InstrBytes,
1457    ops: &[Operand],
1458    instr: &Instruction,
1459) -> Result<(), AsmError> {
1460    if ops.is_empty() {
1461        buf.push(0x90);
1462        Ok(())
1463    } else {
1464        Err(AsmError::InvalidOperands {
1465            detail: String::from("nop takes no operands"),
1466            span: instr.span,
1467        })
1468    }
1469}
1470
1471#[cfg(any(feature = "x86", feature = "x86_64"))]
1472pub(crate) fn encode_multibyte_nop(buf: &mut InstrBytes, mnemonic: &str) -> Result<(), AsmError> {
1473    let n: usize = mnemonic[3..].parse().unwrap_or(1);
1474    // Intel recommended multi-byte NOP sequences
1475    match n {
1476        2 => buf.extend_from_slice(&[0x66, 0x90]),
1477        3 => buf.extend_from_slice(&[0x0F, 0x1F, 0x00]),
1478        4 => buf.extend_from_slice(&[0x0F, 0x1F, 0x40, 0x00]),
1479        5 => buf.extend_from_slice(&[0x0F, 0x1F, 0x44, 0x00, 0x00]),
1480        6 => buf.extend_from_slice(&[0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00]),
1481        7 => buf.extend_from_slice(&[0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00]),
1482        8 => buf.extend_from_slice(&[0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
1483        9 => buf.extend_from_slice(&[0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
1484        _ => buf.push(0x90),
1485    }
1486    Ok(())
1487}
1488
1489#[cfg(any(feature = "x86", feature = "x86_64"))]
1490pub(crate) fn encode_int(
1491    buf: &mut InstrBytes,
1492    ops: &[Operand],
1493    instr: &Instruction,
1494) -> Result<(), AsmError> {
1495    if ops.len() != 1 {
1496        return Err(invalid_operands(
1497            "int",
1498            "expected one immediate operand",
1499            instr.span,
1500        ));
1501    }
1502    match &ops[0] {
1503        Operand::Immediate(3) => buf.push(0xCC), // INT 3 → single byte
1504        Operand::Immediate(n) if *n >= 0 && *n <= 255 => {
1505            buf.push(0xCD);
1506            buf.push(*n as u8);
1507        }
1508        _ => {
1509            return Err(invalid_operands(
1510                "int",
1511                "expected immediate 0-255",
1512                instr.span,
1513            ))
1514        }
1515    }
1516    Ok(())
1517}
1518
1519#[cfg(any(feature = "x86", feature = "x86_64"))]
1520pub(crate) fn encode_ret(
1521    buf: &mut InstrBytes,
1522    ops: &[Operand],
1523    instr: &Instruction,
1524) -> Result<(), AsmError> {
1525    if ops.is_empty() {
1526        buf.push(0xC3); // Near return
1527    } else if ops.len() == 1 {
1528        match &ops[0] {
1529            Operand::Immediate(n) if *n >= 0 && *n <= 65535 => {
1530                buf.push(0xC2); // Near return with stack pop
1531                buf.extend_from_slice(&(*n as u16).to_le_bytes());
1532            }
1533            _ => {
1534                return Err(invalid_operands(
1535                    "ret",
1536                    "expected immediate 0-65535",
1537                    instr.span,
1538                ))
1539            }
1540        }
1541    } else {
1542        return Err(invalid_operands(
1543            "ret",
1544            "expected 0 or 1 operands",
1545            instr.span,
1546        ));
1547    }
1548    Ok(())
1549}
1550
1551#[cfg(any(feature = "x86", feature = "x86_64"))]
1552pub(crate) fn encode_retf(
1553    buf: &mut InstrBytes,
1554    ops: &[Operand],
1555    instr: &Instruction,
1556) -> Result<(), AsmError> {
1557    if ops.is_empty() {
1558        buf.push(0xCB); // Far return
1559    } else if ops.len() == 1 {
1560        match &ops[0] {
1561            Operand::Immediate(n) if *n >= 0 && *n <= 65535 => {
1562                buf.push(0xCA); // Far return with stack pop
1563                buf.extend_from_slice(&(*n as u16).to_le_bytes());
1564            }
1565            _ => {
1566                return Err(invalid_operands(
1567                    "retf",
1568                    "expected immediate 0-65535",
1569                    instr.span,
1570                ))
1571            }
1572        }
1573    } else {
1574        return Err(invalid_operands(
1575            "retf",
1576            "expected 0 or 1 operands",
1577            instr.span,
1578        ));
1579    }
1580    Ok(())
1581}
1582
1583#[cfg(any(feature = "x86", feature = "x86_64"))]
1584pub(crate) fn encode_mov(
1585    buf: &mut InstrBytes,
1586    ops: &[Operand],
1587    instr: &Instruction,
1588    reloc: &mut Option<Relocation>,
1589) -> Result<(), AsmError> {
1590    if ops.len() != 2 {
1591        return Err(invalid_operands("mov", "expected 2 operands", instr.span));
1592    }
1593
1594    match (&ops[0], &ops[1]) {
1595        // mov r, r
1596        (Operand::Register(dst), Operand::Register(src)) => {
1597            let size = reg_size(*dst);
1598            if size != reg_size(*src) {
1599                return Err(invalid_operands("mov", "operand size mismatch", instr.span));
1600            }
1601            let opcode = if size == 8 {
1602                &[0x88u8] as &[u8]
1603            } else {
1604                &[0x89u8]
1605            };
1606            emit_rr(buf, opcode, *dst, *src, instr.span)?;
1607        }
1608
1609        // mov r, imm
1610        (Operand::Register(dst), Operand::Immediate(imm)) => {
1611            encode_mov_reg_imm(buf, *dst, *imm, instr.span)?;
1612        }
1613
1614        // mov r, label  /  mov r, label+offset
1615        (Operand::Register(dst), op @ (Operand::Label(_) | Operand::Expression(_))) => {
1616            let Some((label, addend)) = extract_label(op) else {
1617                return Err(invalid_operands(
1618                    "mov",
1619                    "expected label expression",
1620                    instr.span,
1621                ));
1622            };
1623            // movabs r64, imm64 with relocation
1624            let size = reg_size(*dst);
1625            if size == 64 {
1626                let w = true;
1627                let b = dst.is_extended();
1628                buf.push(rex(w, false, false, b));
1629                buf.push(0xB8 + dst.base_code());
1630                let reloc_off = buf.len();
1631                buf.extend_from_slice(&0u64.to_le_bytes());
1632                *reloc = Some(Relocation {
1633                    offset: reloc_off,
1634                    size: 8,
1635                    label: alloc::rc::Rc::from(label),
1636                    kind: RelocKind::Absolute,
1637                    addend,
1638                    trailing_bytes: 0,
1639                });
1640            } else {
1641                return Err(invalid_operands(
1642                    "mov",
1643                    "label operand requires 64-bit register",
1644                    instr.span,
1645                ));
1646            }
1647        }
1648
1649        // mov r, [mem]
1650        (Operand::Register(dst), Operand::Memory(mem)) => {
1651            let size = reg_size(*dst);
1652            let opcode: u8 = if size == 8 { 0x8A } else { 0x8B };
1653            emit_rex_for_reg_mem(buf, *dst, mem)?;
1654            buf.push(opcode);
1655            let reloc_off = emit_mem_modrm(buf, dst.base_code(), mem);
1656            if let Some(ref label) = mem.disp_label {
1657                *reloc = Some(Relocation {
1658                    offset: reloc_off.unwrap_or(buf.len()),
1659                    size: 4,
1660                    label: alloc::rc::Rc::from(&**label),
1661                    kind: if mem.base == Some(Register::Rip) {
1662                        RelocKind::X86Relative
1663                    } else {
1664                        RelocKind::Absolute
1665                    },
1666                    addend: mem.disp,
1667                    trailing_bytes: 0,
1668                });
1669            }
1670        }
1671
1672        // mov [mem], r
1673        (Operand::Memory(mem), Operand::Register(src)) => {
1674            let size = reg_size(*src);
1675            let opcode: u8 = if size == 8 { 0x88 } else { 0x89 };
1676            emit_rex_for_reg_mem(buf, *src, mem)?;
1677            buf.push(opcode);
1678            let disp_off = emit_mem_modrm(buf, src.base_code(), mem);
1679            set_mem_reloc(reloc, mem, disp_off, buf.len());
1680        }
1681
1682        // mov [mem], imm
1683        (Operand::Memory(mem), Operand::Immediate(imm)) => {
1684            let size = instr
1685                .size_hint
1686                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
1687                    s.bits() as u8
1688                });
1689            // mov r/m64, imm32 sign-extends — reject values that don't fit
1690            if size == 64 {
1691                let v = *imm;
1692                if v > i64::from(i32::MAX) as i128 || v < i64::from(i32::MIN) as i128 {
1693                    return Err(invalid_operands(
1694                        "mov",
1695                        "immediate too large for mov [mem], imm (max sign-extended imm32); use mov reg, imm64 + mov [mem], reg",
1696                        instr.span,
1697                    ));
1698                }
1699            }
1700            let opcode: u8 = if size == 8 { 0xC6 } else { 0xC7 };
1701            emit_rex_for_digit_mem(buf, size, mem);
1702            buf.push(opcode);
1703            let disp_off = emit_mem_modrm(buf, 0, mem); // /0
1704            set_mem_reloc(reloc, mem, disp_off, buf.len());
1705            emit_imm(buf, *imm, if size > 32 { 32 } else { size }); // max imm32 for mov r/m, imm
1706        }
1707
1708        _ => {
1709            return Err(invalid_operands(
1710                "mov",
1711                "unsupported operand combination",
1712                instr.span,
1713            ))
1714        }
1715    }
1716    Ok(())
1717}
1718
1719#[cfg(any(feature = "x86", feature = "x86_64"))]
1720pub(crate) fn encode_mov_reg_imm(
1721    buf: &mut InstrBytes,
1722    dst: Register,
1723    imm: i128,
1724    span: Span,
1725) -> Result<(), AsmError> {
1726    let size = reg_size(dst);
1727
1728    match size {
1729        8 => {
1730            let b = dst.is_extended();
1731            let need = b || dst.requires_rex_for_byte();
1732            if need {
1733                buf.push(rex(false, false, false, b));
1734            }
1735            buf.push(0xB0 + dst.base_code());
1736            buf.push(imm as u8);
1737        }
1738        16 => {
1739            buf.push(0x66);
1740            let b = dst.is_extended();
1741            if b {
1742                buf.push(rex(false, false, false, b));
1743            }
1744            buf.push(0xB8 + dst.base_code());
1745            buf.extend_from_slice(&(imm as u16).to_le_bytes());
1746        }
1747        32 => {
1748            let b = dst.is_extended();
1749            if b {
1750                buf.push(rex(false, false, false, b));
1751            }
1752            buf.push(0xB8 + dst.base_code());
1753            buf.extend_from_slice(&(imm as u32).to_le_bytes());
1754        }
1755        64 => {
1756            // Check if we can use shorter encoding
1757            let b = dst.is_extended();
1758            if imm >= 0 && imm <= u32::MAX as i128 {
1759                // mov r32, imm32 (zero-extends to r64)
1760                if b {
1761                    buf.push(rex(false, false, false, true));
1762                }
1763                buf.push(0xB8 + dst.base_code());
1764                buf.extend_from_slice(&(imm as u32).to_le_bytes());
1765            } else if imm >= i32::MIN as i128 && imm <= i32::MAX as i128 {
1766                // mov r64, sign-extended imm32
1767                buf.push(rex(true, false, false, b));
1768                buf.push(0xC7);
1769                buf.push(modrm(0b11, 0, dst.base_code()));
1770                buf.extend_from_slice(&(imm as i32).to_le_bytes());
1771            } else {
1772                // movabs r64, imm64
1773                buf.push(rex(true, false, false, b));
1774                buf.push(0xB8 + dst.base_code());
1775                buf.extend_from_slice(&(imm as u64).to_le_bytes());
1776            }
1777        }
1778        _ => {
1779            return Err(AsmError::InvalidOperands {
1780                detail: String::from("unsupported register size for mov immediate"),
1781                span,
1782            });
1783        }
1784    }
1785    Ok(())
1786}
1787
1788#[cfg(any(feature = "x86", feature = "x86_64"))]
1789pub(crate) fn encode_lea(
1790    buf: &mut InstrBytes,
1791    ops: &[Operand],
1792    instr: &Instruction,
1793    reloc: &mut Option<Relocation>,
1794) -> Result<(), AsmError> {
1795    if ops.len() != 2 {
1796        return Err(invalid_operands("lea", "expected 2 operands", instr.span));
1797    }
1798    match (&ops[0], &ops[1]) {
1799        (Operand::Register(dst), Operand::Memory(mem)) => {
1800            emit_rex_for_reg_mem(buf, *dst, mem)?;
1801            buf.push(0x8D);
1802            let reloc_off = emit_mem_modrm(buf, dst.base_code(), mem);
1803            if let Some(ref label) = mem.disp_label {
1804                *reloc = Some(Relocation {
1805                    offset: reloc_off.unwrap_or(buf.len()),
1806                    size: 4,
1807                    label: alloc::rc::Rc::from(&**label),
1808                    kind: if mem.base == Some(Register::Rip) {
1809                        RelocKind::X86Relative
1810                    } else {
1811                        RelocKind::Absolute
1812                    },
1813                    addend: mem.disp,
1814                    trailing_bytes: 0,
1815                });
1816            }
1817        }
1818        _ => return Err(invalid_operands("lea", "expected reg, [mem]", instr.span)),
1819    }
1820    Ok(())
1821}
1822
1823#[cfg(any(feature = "x86", feature = "x86_64"))]
1824pub(crate) fn encode_push(
1825    buf: &mut InstrBytes,
1826    ops: &[Operand],
1827    instr: &Instruction,
1828    reloc: &mut Option<Relocation>,
1829) -> Result<(), AsmError> {
1830    if ops.len() != 1 {
1831        return Err(invalid_operands("push", "expected 1 operand", instr.span));
1832    }
1833    match &ops[0] {
1834        Operand::Register(reg) => {
1835            let size = reg.size_bits();
1836            // Segment register push (only FS and GS in 64-bit mode)
1837            match reg {
1838                Register::Fs => {
1839                    buf.push(0x0F);
1840                    buf.push(0xA0);
1841                    return Ok(());
1842                }
1843                Register::Gs => {
1844                    buf.push(0x0F);
1845                    buf.push(0xA8);
1846                    return Ok(());
1847                }
1848                Register::Cs | Register::Ds | Register::Es | Register::Ss => {
1849                    return Err(invalid_operands(
1850                        "push",
1851                        "CS/DS/ES/SS push not valid in 64-bit mode",
1852                        instr.span,
1853                    ));
1854                }
1855                _ => {}
1856            }
1857            if size == 8 || size == 32 {
1858                return Err(invalid_operands(
1859                    "push",
1860                    "push requires 64-bit or 16-bit register in 64-bit mode",
1861                    instr.span,
1862                ));
1863            }
1864            let b = reg.is_extended();
1865            if size == 16 {
1866                buf.push(0x66);
1867            }
1868            if b {
1869                buf.push(rex(false, false, false, true));
1870            }
1871            buf.push(0x50 + reg.base_code());
1872        }
1873        Operand::Immediate(imm) => {
1874            if *imm >= i8::MIN as i128 && *imm <= i8::MAX as i128 {
1875                buf.push(0x6A);
1876                buf.push(*imm as i8 as u8);
1877            } else if *imm >= i32::MIN as i128 && *imm <= u32::MAX as i128 {
1878                buf.push(0x68);
1879                buf.extend_from_slice(&(*imm as i32).to_le_bytes());
1880            } else {
1881                return Err(invalid_operands(
1882                    "push",
1883                    "immediate value out of range for push (must fit in 32 bits)",
1884                    instr.span,
1885                ));
1886            }
1887        }
1888        Operand::Memory(mem) => {
1889            // push defaults to 64-bit operand size — REX.W is redundant.
1890            // Still need REX.B/X for extended base/index registers.
1891            emit_rex_for_digit_mem(buf, 0, mem);
1892            buf.push(0xFF);
1893            emit_mem_modrm(buf, 6, mem); // /6
1894        }
1895        op @ (Operand::Label(_) | Operand::Expression(_)) => {
1896            let Some((label, addend)) = extract_label(op) else {
1897                return Err(invalid_operands("push", "unsupported operand", instr.span));
1898            };
1899            // push imm32 with relocation
1900            buf.push(0x68);
1901            let reloc_off = buf.len();
1902            buf.extend_from_slice(&0i32.to_le_bytes());
1903            *reloc = Some(Relocation {
1904                offset: reloc_off,
1905                size: 4,
1906                label: alloc::rc::Rc::from(label),
1907                kind: RelocKind::Absolute,
1908                addend,
1909                trailing_bytes: 0,
1910            });
1911        }
1912        _ => return Err(invalid_operands("push", "unsupported operand", instr.span)),
1913    }
1914    Ok(())
1915}
1916
1917#[cfg(any(feature = "x86", feature = "x86_64"))]
1918pub(crate) fn encode_pop(
1919    buf: &mut InstrBytes,
1920    ops: &[Operand],
1921    instr: &Instruction,
1922) -> Result<(), AsmError> {
1923    if ops.len() != 1 {
1924        return Err(invalid_operands("pop", "expected 1 operand", instr.span));
1925    }
1926    match &ops[0] {
1927        Operand::Register(reg) => {
1928            let size = reg.size_bits();
1929            // Segment register pop (only FS and GS in 64-bit mode)
1930            match reg {
1931                Register::Fs => {
1932                    buf.push(0x0F);
1933                    buf.push(0xA1);
1934                    return Ok(());
1935                }
1936                Register::Gs => {
1937                    buf.push(0x0F);
1938                    buf.push(0xA9);
1939                    return Ok(());
1940                }
1941                Register::Cs | Register::Ds | Register::Es | Register::Ss => {
1942                    return Err(invalid_operands(
1943                        "pop",
1944                        "CS/DS/ES/SS pop not valid in 64-bit mode",
1945                        instr.span,
1946                    ));
1947                }
1948                _ => {}
1949            }
1950            if size == 8 || size == 32 {
1951                return Err(invalid_operands(
1952                    "pop",
1953                    "pop requires 64-bit or 16-bit register in 64-bit mode",
1954                    instr.span,
1955                ));
1956            }
1957            let b = reg.is_extended();
1958            if size == 16 {
1959                buf.push(0x66);
1960            }
1961            if b {
1962                buf.push(rex(false, false, false, true));
1963            }
1964            buf.push(0x58 + reg.base_code());
1965        }
1966        Operand::Memory(mem) => {
1967            // pop defaults to 64-bit operand size — REX.W is redundant.
1968            emit_rex_for_digit_mem(buf, 0, mem);
1969            buf.push(0x8F);
1970            emit_mem_modrm(buf, 0, mem);
1971        }
1972        _ => return Err(invalid_operands("pop", "unsupported operand", instr.span)),
1973    }
1974    Ok(())
1975}
1976
1977/// Encode ALU instructions: add/or/adc/sbb/and/sub/xor/cmp.
1978/// `alu_num` is 0=add, 1=or, 2=adc, 3=sbb, 4=and, 5=sub, 6=xor, 7=cmp.
1979#[cfg(any(feature = "x86", feature = "x86_64"))]
1980pub(crate) fn encode_alu(
1981    buf: &mut InstrBytes,
1982    ops: &[Operand],
1983    instr: &Instruction,
1984    alu_num: u8,
1985    reloc: &mut Option<Relocation>,
1986) -> Result<(), AsmError> {
1987    if ops.len() != 2 {
1988        return Err(invalid_operands(
1989            &instr.mnemonic,
1990            "expected 2 operands",
1991            instr.span,
1992        ));
1993    }
1994
1995    match (&ops[0], &ops[1]) {
1996        // r/m, r
1997        (Operand::Register(dst), Operand::Register(src)) => {
1998            let size = reg_size(*dst);
1999            let base_opcode = if size == 8 {
2000                alu_num * 8
2001            } else {
2002                alu_num * 8 + 1
2003            };
2004            emit_rr(buf, &[base_opcode], *dst, *src, instr.span)?;
2005        }
2006
2007        // r, imm
2008        (Operand::Register(dst), Operand::Immediate(imm)) => {
2009            encode_alu_reg_imm(buf, *dst, *imm, alu_num)?;
2010        }
2011
2012        // r, [mem]
2013        (Operand::Register(dst), Operand::Memory(mem)) => {
2014            let size = reg_size(*dst);
2015            let opcode: u8 = if size == 8 {
2016                alu_num * 8 + 2
2017            } else {
2018                alu_num * 8 + 3
2019            };
2020            emit_rex_for_reg_mem(buf, *dst, mem)?;
2021            buf.push(opcode);
2022            let disp_off = emit_mem_modrm(buf, dst.base_code(), mem);
2023            set_mem_reloc(reloc, mem, disp_off, buf.len());
2024        }
2025
2026        // [mem], r
2027        (Operand::Memory(mem), Operand::Register(src)) => {
2028            let size = reg_size(*src);
2029            let opcode: u8 = if size == 8 {
2030                alu_num * 8
2031            } else {
2032                alu_num * 8 + 1
2033            };
2034            emit_rex_for_reg_mem(buf, *src, mem)?;
2035            buf.push(opcode);
2036            let disp_off = emit_mem_modrm(buf, src.base_code(), mem);
2037            set_mem_reloc(reloc, mem, disp_off, buf.len());
2038        }
2039
2040        // [mem], imm
2041        (Operand::Memory(mem), Operand::Immediate(imm)) => {
2042            let size = instr
2043                .size_hint
2044                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2045                    s.bits() as u8
2046                });
2047            let disp_off = encode_alu_mem_imm(buf, mem, *imm, alu_num, size)?;
2048            set_mem_reloc(reloc, mem, disp_off, buf.len());
2049        }
2050
2051        // al/ax/eax/rax, imm (special short forms)
2052        _ => {
2053            return Err(invalid_operands(
2054                &instr.mnemonic,
2055                "unsupported operand combination",
2056                instr.span,
2057            ));
2058        }
2059    }
2060    Ok(())
2061}
2062
2063#[cfg(any(feature = "x86", feature = "x86_64"))]
2064pub(crate) fn encode_alu_reg_imm(
2065    buf: &mut InstrBytes,
2066    dst: Register,
2067    imm: i128,
2068    alu_num: u8,
2069) -> Result<(), AsmError> {
2070    let size = reg_size(dst);
2071
2072    // Special case: al/ax/eax/rax, imm (short form)
2073    if dst.base_code() == 0 && !dst.is_extended() && size == 8 {
2074        buf.push(alu_num * 8 + 4);
2075        buf.push(imm as u8);
2076        return Ok(());
2077    }
2078
2079    if size == 8 {
2080        let b = dst.is_extended();
2081        let need = b || dst.requires_rex_for_byte();
2082        if need {
2083            buf.push(rex(false, false, false, b));
2084        }
2085        buf.push(0x80);
2086        buf.push(modrm(0b11, alu_num, dst.base_code()));
2087        buf.push(imm as u8);
2088    } else if imm >= i8::MIN as i128 && imm <= i8::MAX as i128 {
2089        // Sign-extended imm8
2090        let w = size == 64;
2091        let b = dst.is_extended();
2092        if size == 16 {
2093            buf.push(0x66);
2094        }
2095        if needs_rex(w, false, false, b) {
2096            buf.push(rex(w, false, false, b));
2097        }
2098        buf.push(0x83);
2099        buf.push(modrm(0b11, alu_num, dst.base_code()));
2100        buf.push(imm as i8 as u8);
2101    } else {
2102        // Full immediate
2103        let w = size == 64;
2104        let b = dst.is_extended();
2105        if size == 16 {
2106            buf.push(0x66);
2107        }
2108        if needs_rex(w, false, false, b) {
2109            buf.push(rex(w, false, false, b));
2110        }
2111
2112        // Special case: eax/rax can use shorter opcode
2113        if dst.base_code() == 0 && !dst.is_extended() {
2114            buf.push(alu_num * 8 + 5);
2115        } else {
2116            buf.push(0x81);
2117            buf.push(modrm(0b11, alu_num, dst.base_code()));
2118        }
2119        let imm_size = if size > 32 { 32 } else { size };
2120        emit_imm(buf, imm, imm_size);
2121    }
2122    Ok(())
2123}
2124
2125#[cfg(any(feature = "x86", feature = "x86_64"))]
2126pub(crate) fn encode_alu_mem_imm(
2127    buf: &mut InstrBytes,
2128    mem: &MemoryOperand,
2129    imm: i128,
2130    alu_num: u8,
2131    size: u8,
2132) -> Result<Option<usize>, AsmError> {
2133    if size == 8 {
2134        emit_rex_for_digit_mem(buf, size, mem);
2135        buf.push(0x80);
2136        let disp_off = emit_mem_modrm(buf, alu_num, mem);
2137        buf.push(imm as u8);
2138        Ok(disp_off)
2139    } else if imm >= i8::MIN as i128 && imm <= i8::MAX as i128 {
2140        emit_rex_for_digit_mem(buf, size, mem);
2141        buf.push(0x83);
2142        let disp_off = emit_mem_modrm(buf, alu_num, mem);
2143        buf.push(imm as i8 as u8);
2144        Ok(disp_off)
2145    } else {
2146        emit_rex_for_digit_mem(buf, size, mem);
2147        buf.push(0x81);
2148        let disp_off = emit_mem_modrm(buf, alu_num, mem);
2149        let imm_size = if size > 32 { 32 } else { size };
2150        emit_imm(buf, imm, imm_size);
2151        Ok(disp_off)
2152    }
2153}
2154
2155#[cfg(any(feature = "x86", feature = "x86_64"))]
2156pub(crate) fn encode_test(
2157    buf: &mut InstrBytes,
2158    ops: &[Operand],
2159    instr: &Instruction,
2160    reloc: &mut Option<Relocation>,
2161) -> Result<(), AsmError> {
2162    if ops.len() != 2 {
2163        return Err(invalid_operands("test", "expected 2 operands", instr.span));
2164    }
2165    match (&ops[0], &ops[1]) {
2166        (Operand::Register(dst), Operand::Register(src)) => {
2167            let size = reg_size(*dst);
2168            let opcode = if size == 8 { 0x84u8 } else { 0x85u8 };
2169            emit_rr(buf, &[opcode], *dst, *src, instr.span)?;
2170        }
2171        (Operand::Register(dst), Operand::Immediate(imm)) => {
2172            let size = reg_size(*dst);
2173            // Short form for AL/AX/EAX/RAX
2174            if dst.base_code() == 0 && !dst.is_extended() && size == 8 {
2175                buf.push(0xA8);
2176                buf.push(*imm as u8);
2177            } else if dst.base_code() == 0 && !dst.is_extended() && size > 8 {
2178                let w = size == 64;
2179                if size == 16 {
2180                    buf.push(0x66);
2181                }
2182                if w {
2183                    buf.push(rex(true, false, false, false));
2184                }
2185                buf.push(0xA9);
2186                let imm_size = if size > 32 { 32 } else { size };
2187                emit_imm(buf, *imm, imm_size);
2188            } else {
2189                let w = size == 64;
2190                let b = dst.is_extended();
2191                if size == 16 {
2192                    buf.push(0x66);
2193                }
2194                let need = needs_rex(w, false, false, b) || dst.requires_rex_for_byte();
2195                if need {
2196                    buf.push(rex(w, false, false, b));
2197                }
2198                buf.push(if size == 8 { 0xF6 } else { 0xF7 });
2199                buf.push(modrm(0b11, 0, dst.base_code()));
2200                let imm_size = if size == 8 {
2201                    8
2202                } else if size > 32 {
2203                    32
2204                } else {
2205                    size
2206                };
2207                emit_imm(buf, *imm, imm_size);
2208            }
2209        }
2210        (Operand::Memory(mem), Operand::Register(src)) => {
2211            let size = reg_size(*src);
2212            let opcode = if size == 8 { 0x84u8 } else { 0x85u8 };
2213            emit_rex_for_reg_mem(buf, *src, mem)?;
2214            buf.push(opcode);
2215            let disp_off = emit_mem_modrm(buf, src.base_code(), mem);
2216            set_mem_reloc(reloc, mem, disp_off, buf.len());
2217        }
2218        (Operand::Memory(mem), Operand::Immediate(imm)) => {
2219            let size = instr
2220                .size_hint
2221                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2222                    s.bits() as u8
2223                });
2224            let opcode = if size == 8 { 0xF6u8 } else { 0xF7u8 };
2225            emit_rex_for_digit_mem(buf, size, mem);
2226            buf.push(opcode);
2227            let disp_off = emit_mem_modrm(buf, 0, mem); // /0
2228            let imm_size = if size == 8 {
2229                8
2230            } else if size > 32 {
2231                32
2232            } else {
2233                size
2234            };
2235            emit_imm(buf, *imm, imm_size);
2236            set_mem_reloc(reloc, mem, disp_off, buf.len());
2237        }
2238        _ => {
2239            return Err(invalid_operands(
2240                "test",
2241                "unsupported operand combination",
2242                instr.span,
2243            ))
2244        }
2245    }
2246    Ok(())
2247}
2248
2249/// Encode unary instructions: NOT, NEG, MUL, DIV, IDIV.
2250#[cfg(any(feature = "x86", feature = "x86_64"))]
2251pub(crate) fn encode_unary(
2252    buf: &mut InstrBytes,
2253    ops: &[Operand],
2254    instr: &Instruction,
2255    digit: u8,
2256) -> Result<(), AsmError> {
2257    if ops.len() != 1 {
2258        return Err(invalid_operands(
2259            &instr.mnemonic,
2260            "expected 1 operand",
2261            instr.span,
2262        ));
2263    }
2264    match &ops[0] {
2265        Operand::Register(reg) => {
2266            let size = reg_size(*reg);
2267            let w = size == 64;
2268            let b = reg.is_extended();
2269            if size == 16 {
2270                buf.push(0x66);
2271            }
2272            let need = needs_rex(w, false, false, b) || reg.requires_rex_for_byte();
2273            if need {
2274                buf.push(rex(w, false, false, b));
2275            }
2276            buf.push(if size == 8 { 0xF6 } else { 0xF7 });
2277            buf.push(modrm(0b11, digit, reg.base_code()));
2278        }
2279        Operand::Memory(mem) => {
2280            let size = instr
2281                .size_hint
2282                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2283                    s.bits() as u8
2284                });
2285            emit_rex_for_digit_mem(buf, size, mem);
2286            buf.push(if size == 8 { 0xF6 } else { 0xF7 });
2287            emit_mem_modrm(buf, digit, mem);
2288        }
2289        _ => {
2290            return Err(invalid_operands(
2291                &instr.mnemonic,
2292                "expected register or memory operand",
2293                instr.span,
2294            ))
2295        }
2296    }
2297    Ok(())
2298}
2299
2300#[cfg(any(feature = "x86", feature = "x86_64"))]
2301pub(crate) fn encode_imul(
2302    buf: &mut InstrBytes,
2303    ops: &[Operand],
2304    instr: &Instruction,
2305) -> Result<(), AsmError> {
2306    match ops.len() {
2307        1 => {
2308            // One-operand IMUL: EDX:EAX = EAX * r/m
2309            encode_unary(buf, ops, instr, 5)
2310        }
2311        2 => {
2312            // Two-operand IMUL: r = r * r/m
2313            match (&ops[0], &ops[1]) {
2314                (Operand::Register(dst), Operand::Register(src)) => {
2315                    let size = reg_size(*dst);
2316                    if size == 8 {
2317                        return Err(invalid_operands(
2318                            "imul",
2319                            "8-bit operands not supported for 2/3-operand IMUL",
2320                            instr.span,
2321                        ));
2322                    }
2323                    let w = size == 64;
2324                    let r = dst.is_extended();
2325                    let b = src.is_extended();
2326                    if size == 16 {
2327                        buf.push(0x66);
2328                    }
2329                    if needs_rex(w, r, false, b) {
2330                        buf.push(rex(w, r, false, b));
2331                    }
2332                    buf.push(0x0F);
2333                    buf.push(0xAF);
2334                    buf.push(modrm(0b11, dst.base_code(), src.base_code()));
2335                }
2336                (Operand::Register(dst), Operand::Memory(mem)) => {
2337                    if reg_size(*dst) == 8 {
2338                        return Err(invalid_operands(
2339                            "imul",
2340                            "8-bit operands not supported for 2/3-operand IMUL",
2341                            instr.span,
2342                        ));
2343                    }
2344                    emit_rex_for_reg_mem(buf, *dst, mem)?;
2345                    buf.push(0x0F);
2346                    buf.push(0xAF);
2347                    emit_mem_modrm(buf, dst.base_code(), mem);
2348                }
2349                _ => {
2350                    return Err(invalid_operands(
2351                        "imul",
2352                        "unsupported operand combination",
2353                        instr.span,
2354                    ))
2355                }
2356            }
2357            Ok(())
2358        }
2359        3 => {
2360            // Three-operand IMUL: r = r/m * imm
2361            match (&ops[0], &ops[1], &ops[2]) {
2362                (Operand::Register(dst), Operand::Register(src), Operand::Immediate(imm)) => {
2363                    let size = reg_size(*dst);
2364                    if size == 8 {
2365                        return Err(invalid_operands(
2366                            "imul",
2367                            "8-bit operands not supported for 2/3-operand IMUL",
2368                            instr.span,
2369                        ));
2370                    }
2371                    let w = size == 64;
2372                    let r = dst.is_extended();
2373                    let b = src.is_extended();
2374                    if size == 16 {
2375                        buf.push(0x66);
2376                    }
2377                    if needs_rex(w, r, false, b) {
2378                        buf.push(rex(w, r, false, b));
2379                    }
2380
2381                    if *imm >= i8::MIN as i128 && *imm <= i8::MAX as i128 {
2382                        buf.push(0x6B);
2383                        buf.push(modrm(0b11, dst.base_code(), src.base_code()));
2384                        buf.push(*imm as i8 as u8);
2385                    } else {
2386                        buf.push(0x69);
2387                        buf.push(modrm(0b11, dst.base_code(), src.base_code()));
2388                        let imm_size = if size > 32 { 32 } else { size };
2389                        emit_imm(buf, *imm, imm_size);
2390                    }
2391                }
2392                (Operand::Register(dst), Operand::Memory(mem), Operand::Immediate(imm)) => {
2393                    let size = reg_size(*dst);
2394                    if size == 8 {
2395                        return Err(invalid_operands(
2396                            "imul",
2397                            "8-bit operands not supported for 2/3-operand IMUL",
2398                            instr.span,
2399                        ));
2400                    }
2401                    emit_rex_for_reg_mem(buf, *dst, mem)?;
2402
2403                    if *imm >= i8::MIN as i128 && *imm <= i8::MAX as i128 {
2404                        buf.push(0x6B);
2405                        emit_mem_modrm(buf, dst.base_code(), mem);
2406                        buf.push(*imm as i8 as u8);
2407                    } else {
2408                        buf.push(0x69);
2409                        emit_mem_modrm(buf, dst.base_code(), mem);
2410                        let imm_size = if size > 32 { 32 } else { size };
2411                        emit_imm(buf, *imm, imm_size);
2412                    }
2413                }
2414                _ => {
2415                    return Err(invalid_operands(
2416                        "imul",
2417                        "expected reg, r/m, imm",
2418                        instr.span,
2419                    ))
2420                }
2421            }
2422            Ok(())
2423        }
2424        _ => Err(invalid_operands(
2425            "imul",
2426            "expected 1-3 operands",
2427            instr.span,
2428        )),
2429    }
2430}
2431
2432#[cfg(any(feature = "x86", feature = "x86_64"))]
2433pub(crate) fn encode_inc_dec(
2434    buf: &mut InstrBytes,
2435    ops: &[Operand],
2436    instr: &Instruction,
2437    digit: u8,
2438) -> Result<(), AsmError> {
2439    if ops.len() != 1 {
2440        return Err(invalid_operands(
2441            &instr.mnemonic,
2442            "expected 1 operand",
2443            instr.span,
2444        ));
2445    }
2446    match &ops[0] {
2447        Operand::Register(reg) => {
2448            let size = reg_size(*reg);
2449            let w = size == 64;
2450            let b = reg.is_extended();
2451            if size == 16 {
2452                buf.push(0x66);
2453            }
2454            let need = needs_rex(w, false, false, b) || reg.requires_rex_for_byte();
2455            if need {
2456                buf.push(rex(w, false, false, b));
2457            }
2458            buf.push(if size == 8 { 0xFE } else { 0xFF });
2459            buf.push(modrm(0b11, digit, reg.base_code()));
2460        }
2461        Operand::Memory(mem) => {
2462            let size = instr
2463                .size_hint
2464                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2465                    s.bits() as u8
2466                });
2467            emit_rex_for_digit_mem(buf, size, mem);
2468            buf.push(if size == 8 { 0xFE } else { 0xFF });
2469            emit_mem_modrm(buf, digit, mem);
2470        }
2471        _ => {
2472            return Err(invalid_operands(
2473                &instr.mnemonic,
2474                "expected register or memory",
2475                instr.span,
2476            ))
2477        }
2478    }
2479    Ok(())
2480}
2481
2482#[cfg(any(feature = "x86", feature = "x86_64"))]
2483pub(crate) fn encode_shift(
2484    buf: &mut InstrBytes,
2485    ops: &[Operand],
2486    instr: &Instruction,
2487    digit: u8,
2488) -> Result<(), AsmError> {
2489    if ops.len() != 2 {
2490        return Err(invalid_operands(
2491            &instr.mnemonic,
2492            "expected 2 operands",
2493            instr.span,
2494        ));
2495    }
2496    match (&ops[0], &ops[1]) {
2497        (Operand::Register(dst), Operand::Immediate(1)) => {
2498            let size = reg_size(*dst);
2499            let w = size == 64;
2500            let b = dst.is_extended();
2501            if size == 16 {
2502                buf.push(0x66);
2503            }
2504            let need = needs_rex(w, false, false, b) || dst.requires_rex_for_byte();
2505            if need {
2506                buf.push(rex(w, false, false, b));
2507            }
2508            buf.push(if size == 8 { 0xD0 } else { 0xD1 });
2509            buf.push(modrm(0b11, digit, dst.base_code()));
2510        }
2511        (Operand::Register(dst), Operand::Immediate(imm)) => {
2512            let size = reg_size(*dst);
2513            let w = size == 64;
2514            let b = dst.is_extended();
2515            if size == 16 {
2516                buf.push(0x66);
2517            }
2518            let need = needs_rex(w, false, false, b) || dst.requires_rex_for_byte();
2519            if need {
2520                buf.push(rex(w, false, false, b));
2521            }
2522            buf.push(if size == 8 { 0xC0 } else { 0xC1 });
2523            buf.push(modrm(0b11, digit, dst.base_code()));
2524            buf.push(*imm as u8);
2525        }
2526        (Operand::Register(dst), Operand::Register(Register::Cl)) => {
2527            let size = reg_size(*dst);
2528            let w = size == 64;
2529            let b = dst.is_extended();
2530            if size == 16 {
2531                buf.push(0x66);
2532            }
2533            let need = needs_rex(w, false, false, b) || dst.requires_rex_for_byte();
2534            if need {
2535                buf.push(rex(w, false, false, b));
2536            }
2537            buf.push(if size == 8 { 0xD2 } else { 0xD3 });
2538            buf.push(modrm(0b11, digit, dst.base_code()));
2539        }
2540        (Operand::Memory(mem), Operand::Immediate(1)) => {
2541            let size = instr
2542                .size_hint
2543                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2544                    s.bits() as u8
2545                });
2546            let w = size == 64;
2547            let x = mem.index.is_some_and(|r| r.is_extended());
2548            let b = mem.base.is_some_and(|r| r.is_extended());
2549            if size == 16 {
2550                buf.push(0x66);
2551            }
2552            if needs_rex(w, false, x, b) {
2553                buf.push(rex(w, false, x, b));
2554            }
2555            buf.push(if size == 8 { 0xD0 } else { 0xD1 });
2556            emit_mem_modrm(buf, digit, mem);
2557        }
2558        (Operand::Memory(mem), Operand::Immediate(imm)) => {
2559            let size = instr
2560                .size_hint
2561                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2562                    s.bits() as u8
2563                });
2564            let w = size == 64;
2565            let x = mem.index.is_some_and(|r| r.is_extended());
2566            let b = mem.base.is_some_and(|r| r.is_extended());
2567            if size == 16 {
2568                buf.push(0x66);
2569            }
2570            if needs_rex(w, false, x, b) {
2571                buf.push(rex(w, false, x, b));
2572            }
2573            buf.push(if size == 8 { 0xC0 } else { 0xC1 });
2574            emit_mem_modrm(buf, digit, mem);
2575            buf.push(*imm as u8);
2576        }
2577        (Operand::Memory(mem), Operand::Register(Register::Cl)) => {
2578            let size = instr
2579                .size_hint
2580                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
2581                    s.bits() as u8
2582                });
2583            let w = size == 64;
2584            let x = mem.index.is_some_and(|r| r.is_extended());
2585            let b = mem.base.is_some_and(|r| r.is_extended());
2586            if size == 16 {
2587                buf.push(0x66);
2588            }
2589            if needs_rex(w, false, x, b) {
2590                buf.push(rex(w, false, x, b));
2591            }
2592            buf.push(if size == 8 { 0xD2 } else { 0xD3 });
2593            emit_mem_modrm(buf, digit, mem);
2594        }
2595        _ => {
2596            return Err(invalid_operands(
2597                &instr.mnemonic,
2598                "expected r/m, imm or r/m, cl",
2599                instr.span,
2600            ))
2601        }
2602    }
2603    Ok(())
2604}
2605
2606#[cfg(any(feature = "x86", feature = "x86_64"))]
2607pub(crate) fn encode_jmp(
2608    buf: &mut InstrBytes,
2609    ops: &[Operand],
2610    instr: &Instruction,
2611    reloc: &mut Option<Relocation>,
2612    relax: &mut Option<RelaxInfo>,
2613) -> Result<(), AsmError> {
2614    if ops.len() != 1 {
2615        return Err(invalid_operands("jmp", "expected 1 operand", instr.span));
2616    }
2617    match &ops[0] {
2618        op @ (Operand::Label(_) | Operand::Expression(_)) => {
2619            let Some((label, addend)) = extract_label(op) else {
2620                return Err(invalid_operands("jmp", "expected label", instr.span));
2621            };
2622            // Long form: E9 rel32 (5 bytes) — linker may relax to EB rel8 (2 bytes)
2623            buf.push(0xE9);
2624            let reloc_off = buf.len();
2625            buf.extend_from_slice(&0i32.to_le_bytes());
2626            *reloc = Some(Relocation {
2627                offset: reloc_off,
2628                size: 4,
2629                label: alloc::rc::Rc::from(label),
2630                kind: RelocKind::X86Relative,
2631                addend,
2632                trailing_bytes: 0,
2633            });
2634            // Short form for relaxation (only when no addend — with addend
2635            // the displacement arithmetic is the same but we still offer it)
2636            *relax = Some(RelaxInfo {
2637                short_bytes: InstrBytes::from_slice(&[0xEB, 0x00]),
2638                short_reloc_offset: 1,
2639                short_relocation: None,
2640            });
2641        }
2642        Operand::Immediate(target) => {
2643            // Short jump with known offset
2644            buf.push(0xE9);
2645            buf.extend_from_slice(&(*target as i32).to_le_bytes());
2646        }
2647        Operand::Register(reg) => {
2648            let b = reg.is_extended();
2649            if b {
2650                buf.push(rex(false, false, false, true));
2651            }
2652            buf.push(0xFF);
2653            buf.push(modrm(0b11, 4, reg.base_code()));
2654        }
2655        Operand::Memory(mem) => {
2656            // jmp defaults to 64-bit operand size — REX.W is redundant.
2657            emit_rex_for_digit_mem(buf, 0, mem);
2658            buf.push(0xFF);
2659            emit_mem_modrm(buf, 4, mem);
2660        }
2661        _ => return Err(invalid_operands("jmp", "unsupported operand", instr.span)),
2662    }
2663    Ok(())
2664}
2665
2666#[cfg(any(feature = "x86", feature = "x86_64"))]
2667pub(crate) fn encode_call(
2668    buf: &mut InstrBytes,
2669    ops: &[Operand],
2670    instr: &Instruction,
2671    reloc: &mut Option<Relocation>,
2672) -> Result<(), AsmError> {
2673    if ops.len() != 1 {
2674        return Err(invalid_operands("call", "expected 1 operand", instr.span));
2675    }
2676    match &ops[0] {
2677        op @ (Operand::Label(_) | Operand::Expression(_)) => {
2678            let Some((label, addend)) = extract_label(op) else {
2679                return Err(invalid_operands("call", "expected label", instr.span));
2680            };
2681            buf.push(0xE8);
2682            let reloc_off = buf.len();
2683            buf.extend_from_slice(&0i32.to_le_bytes());
2684            *reloc = Some(Relocation {
2685                offset: reloc_off,
2686                size: 4,
2687                label: alloc::rc::Rc::from(label),
2688                kind: RelocKind::X86Relative,
2689                addend,
2690                trailing_bytes: 0,
2691            });
2692        }
2693        Operand::Register(reg) => {
2694            let b = reg.is_extended();
2695            if b {
2696                buf.push(rex(false, false, false, true));
2697            }
2698            buf.push(0xFF);
2699            buf.push(modrm(0b11, 2, reg.base_code()));
2700        }
2701        Operand::Memory(mem) => {
2702            // call defaults to 64-bit operand size — REX.W is redundant.
2703            emit_rex_for_digit_mem(buf, 0, mem);
2704            buf.push(0xFF);
2705            emit_mem_modrm(buf, 2, mem);
2706        }
2707        _ => return Err(invalid_operands("call", "unsupported operand", instr.span)),
2708    }
2709    Ok(())
2710}
2711
2712#[cfg(any(feature = "x86", feature = "x86_64"))]
2713pub(crate) fn encode_jcc(
2714    buf: &mut InstrBytes,
2715    ops: &[Operand],
2716    instr: &Instruction,
2717    cc: u8,
2718    reloc: &mut Option<Relocation>,
2719    relax: &mut Option<RelaxInfo>,
2720) -> Result<(), AsmError> {
2721    if ops.len() != 1 {
2722        return Err(invalid_operands(
2723            &instr.mnemonic,
2724            "expected 1 operand",
2725            instr.span,
2726        ));
2727    }
2728    match &ops[0] {
2729        op @ (Operand::Label(_) | Operand::Expression(_)) => {
2730            let Some((label, addend)) = extract_label(op) else {
2731                return Err(invalid_operands(
2732                    &instr.mnemonic,
2733                    "expected label or offset",
2734                    instr.span,
2735                ));
2736            };
2737            // Long form: 0F 8x rel32 (6 bytes) — linker may relax to 7x rel8 (2 bytes)
2738            buf.push(0x0F);
2739            buf.push(0x80 + cc);
2740            let reloc_off = buf.len();
2741            buf.extend_from_slice(&0i32.to_le_bytes());
2742            *reloc = Some(Relocation {
2743                offset: reloc_off,
2744                size: 4,
2745                label: alloc::rc::Rc::from(label),
2746                kind: RelocKind::X86Relative,
2747                addend,
2748                trailing_bytes: 0,
2749            });
2750            // Short form for relaxation
2751            *relax = Some(RelaxInfo {
2752                short_bytes: InstrBytes::from_slice(&[0x70 + cc, 0x00]),
2753                short_reloc_offset: 1,
2754                short_relocation: None,
2755            });
2756        }
2757        Operand::Immediate(off) => {
2758            // With known numeric offset — emit near form
2759            buf.push(0x0F);
2760            buf.push(0x80 + cc);
2761            buf.extend_from_slice(&(*off as i32).to_le_bytes());
2762        }
2763        _ => {
2764            return Err(invalid_operands(
2765                &instr.mnemonic,
2766                "expected label or offset",
2767                instr.span,
2768            ))
2769        }
2770    }
2771    Ok(())
2772}
2773
2774#[cfg(any(feature = "x86", feature = "x86_64"))]
2775pub(crate) fn encode_loop(
2776    buf: &mut InstrBytes,
2777    ops: &[Operand],
2778    instr: &Instruction,
2779    opcode: u8,
2780    reloc: &mut Option<Relocation>,
2781    relax: &mut Option<RelaxInfo>,
2782) -> Result<(), AsmError> {
2783    if ops.len() != 1 {
2784        return Err(invalid_operands(
2785            &instr.mnemonic,
2786            "expected 1 operand",
2787            instr.span,
2788        ));
2789    }
2790    match &ops[0] {
2791        op @ (Operand::Label(_) | Operand::Expression(_)) => {
2792            let Some((label, addend)) = extract_label(op) else {
2793                return Err(invalid_operands(
2794                    &instr.mnemonic,
2795                    "expected label",
2796                    instr.span,
2797                ));
2798            };
2799            // Long form (9 bytes): LOOPx +2 / JMP_short +5 / JMP_near rel32
2800            //
2801            // If the target is within ±127 bytes the linker relaxes to the
2802            // short form (2 bytes): LOOPx rel8.
2803            //
2804            // Long-form layout:
2805            //   [0] opcode  LOOPx
2806            //   [1] 0x02    rel8 = +2  → skips the JMP_short, lands on JMP_near
2807            //   [2] 0xEB    JMP short
2808            //   [3] 0x05    rel8 = +5  → skips the JMP_near (CX was zero / cond false)
2809            //   [4] 0xE9    JMP near
2810            //   [5..9] rel32 placeholder → target label
2811            buf.push(opcode);
2812            buf.push(0x02);
2813            buf.push(0xEB);
2814            buf.push(0x05);
2815            buf.push(0xE9);
2816            let reloc_off = buf.len();
2817            buf.extend_from_slice(&0i32.to_le_bytes());
2818            *reloc = Some(Relocation {
2819                offset: reloc_off,
2820                size: 4,
2821                label: alloc::rc::Rc::from(label),
2822                kind: RelocKind::X86Relative,
2823                addend,
2824                trailing_bytes: 0,
2825            });
2826            // Short form for relaxation: LOOPx rel8 (2 bytes)
2827            *relax = Some(RelaxInfo {
2828                short_bytes: InstrBytes::from_slice(&[opcode, 0x00]),
2829                short_reloc_offset: 1,
2830                short_relocation: None,
2831            });
2832        }
2833        _ => {
2834            return Err(invalid_operands(
2835                &instr.mnemonic,
2836                "expected label",
2837                instr.span,
2838            ))
2839        }
2840    }
2841    Ok(())
2842}
2843
2844#[cfg(any(feature = "x86", feature = "x86_64"))]
2845pub(crate) fn encode_setcc(
2846    buf: &mut InstrBytes,
2847    ops: &[Operand],
2848    instr: &Instruction,
2849    cc: u8,
2850) -> Result<(), AsmError> {
2851    if ops.len() != 1 {
2852        return Err(invalid_operands(
2853            &instr.mnemonic,
2854            "expected 1 operand",
2855            instr.span,
2856        ));
2857    }
2858    match &ops[0] {
2859        Operand::Register(reg) => {
2860            if reg.size_bits() != 8 {
2861                return Err(invalid_operands(
2862                    &instr.mnemonic,
2863                    "SETcc requires an 8-bit register operand",
2864                    instr.span,
2865                ));
2866            }
2867            let b = reg.is_extended();
2868            let need = b || reg.requires_rex_for_byte();
2869            if need {
2870                buf.push(rex(false, false, false, b));
2871            }
2872            buf.push(0x0F);
2873            buf.push(0x90 + cc);
2874            buf.push(modrm(0b11, 0, reg.base_code()));
2875        }
2876        Operand::Memory(mem) => {
2877            emit_rex_for_digit_mem(buf, 8, mem);
2878            buf.push(0x0F);
2879            buf.push(0x90 + cc);
2880            emit_mem_modrm(buf, 0, mem);
2881        }
2882        _ => {
2883            return Err(invalid_operands(
2884                &instr.mnemonic,
2885                "expected register or memory",
2886                instr.span,
2887            ))
2888        }
2889    }
2890    Ok(())
2891}
2892
2893#[cfg(any(feature = "x86", feature = "x86_64"))]
2894pub(crate) fn encode_cmovcc(
2895    buf: &mut InstrBytes,
2896    ops: &[Operand],
2897    instr: &Instruction,
2898    cc: u8,
2899) -> Result<(), AsmError> {
2900    if ops.len() != 2 {
2901        return Err(invalid_operands(
2902            &instr.mnemonic,
2903            "expected 2 operands",
2904            instr.span,
2905        ));
2906    }
2907    match (&ops[0], &ops[1]) {
2908        (Operand::Register(dst), Operand::Register(src)) => {
2909            let size = reg_size(*dst);
2910            if size == 8 {
2911                return Err(invalid_operands(
2912                    &instr.mnemonic,
2913                    "CMOVcc requires 16/32/64-bit operands",
2914                    instr.span,
2915                ));
2916            }
2917            let w = size == 64;
2918            let r = dst.is_extended();
2919            let b = src.is_extended();
2920            if size == 16 {
2921                buf.push(0x66);
2922            }
2923            if needs_rex(w, r, false, b) {
2924                buf.push(rex(w, r, false, b));
2925            }
2926            buf.push(0x0F);
2927            buf.push(0x40 + cc);
2928            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
2929        }
2930        (Operand::Register(dst), Operand::Memory(mem)) => {
2931            if reg_size(*dst) == 8 {
2932                return Err(invalid_operands(
2933                    &instr.mnemonic,
2934                    "CMOVcc requires 16/32/64-bit operands",
2935                    instr.span,
2936                ));
2937            }
2938            emit_rex_for_reg_mem(buf, *dst, mem)?;
2939            buf.push(0x0F);
2940            buf.push(0x40 + cc);
2941            emit_mem_modrm(buf, dst.base_code(), mem);
2942        }
2943        _ => {
2944            return Err(invalid_operands(
2945                &instr.mnemonic,
2946                "expected reg, r/m",
2947                instr.span,
2948            ))
2949        }
2950    }
2951    Ok(())
2952}
2953
2954#[cfg(any(feature = "x86", feature = "x86_64"))]
2955pub(crate) fn encode_movzx(
2956    buf: &mut InstrBytes,
2957    ops: &[Operand],
2958    instr: &Instruction,
2959) -> Result<(), AsmError> {
2960    if ops.len() != 2 {
2961        return Err(invalid_operands("movzx", "expected 2 operands", instr.span));
2962    }
2963    match (&ops[0], &ops[1]) {
2964        (Operand::Register(dst), Operand::Register(src)) => {
2965            let dst_size = reg_size(*dst);
2966            let src_size = reg_size(*src);
2967            let w = dst_size == 64;
2968            let r = dst.is_extended();
2969            let b = src.is_extended();
2970            if dst_size == 16 {
2971                buf.push(0x66);
2972            }
2973            let need = needs_rex(w, r, false, b) || src.requires_rex_for_byte();
2974            if need {
2975                buf.push(rex(w, r, false, b));
2976            }
2977            buf.push(0x0F);
2978            buf.push(if src_size == 8 { 0xB6 } else { 0xB7 }); // B6=byte, B7=word
2979            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
2980        }
2981        (Operand::Register(dst), Operand::Memory(mem)) => {
2982            let src_size = instr
2983                .size_hint
2984                .map_or(mem.size.map_or(8u8, |s| s.bits() as u8), |s| s.bits() as u8);
2985            emit_rex_for_reg_mem(buf, *dst, mem)?;
2986            buf.push(0x0F);
2987            buf.push(if src_size == 8 { 0xB6 } else { 0xB7 });
2988            emit_mem_modrm(buf, dst.base_code(), mem);
2989        }
2990        _ => return Err(invalid_operands("movzx", "expected reg, r/m", instr.span)),
2991    }
2992    Ok(())
2993}
2994
2995#[cfg(any(feature = "x86", feature = "x86_64"))]
2996pub(crate) fn encode_movsx(
2997    buf: &mut InstrBytes,
2998    ops: &[Operand],
2999    instr: &Instruction,
3000) -> Result<(), AsmError> {
3001    if ops.len() != 2 {
3002        return Err(invalid_operands("movsx", "expected 2 operands", instr.span));
3003    }
3004    match (&ops[0], &ops[1]) {
3005        (Operand::Register(dst), Operand::Register(src)) => {
3006            let dst_size = reg_size(*dst);
3007            let src_size = reg_size(*src);
3008            let w = dst_size == 64;
3009            let r = dst.is_extended();
3010            let b = src.is_extended();
3011            if dst_size == 16 {
3012                buf.push(0x66);
3013            }
3014            let need = needs_rex(w, r, false, b) || src.requires_rex_for_byte();
3015            if need {
3016                buf.push(rex(w, r, false, b));
3017            }
3018            if src_size == 32 {
3019                // movsxd: 63 /r
3020                buf.push(0x63);
3021            } else {
3022                buf.push(0x0F);
3023                buf.push(if src_size == 8 { 0xBE } else { 0xBF });
3024            }
3025            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
3026        }
3027        (Operand::Register(dst), Operand::Memory(mem)) => {
3028            let src_size = instr
3029                .size_hint
3030                .map_or(mem.size.map_or(8u8, |s| s.bits() as u8), |s| s.bits() as u8);
3031            let dst_size = reg_size(*dst);
3032            if src_size == 32 {
3033                // movsxd: REX.W 63 /r
3034                let w = dst_size == 64;
3035                let r = dst.is_extended();
3036                let x = mem.index.is_some_and(|r| r.is_extended());
3037                let b = mem.base.is_some_and(|r| r.is_extended());
3038                if dst_size == 16 {
3039                    buf.push(0x66);
3040                }
3041                if needs_rex(w, r, x, b) {
3042                    buf.push(rex(w, r, x, b));
3043                }
3044                buf.push(0x63);
3045            } else {
3046                emit_rex_for_reg_mem(buf, *dst, mem)?;
3047                buf.push(0x0F);
3048                buf.push(if src_size == 8 { 0xBE } else { 0xBF });
3049            }
3050            emit_mem_modrm(buf, dst.base_code(), mem);
3051        }
3052        _ => return Err(invalid_operands("movsx", "expected reg, r/m", instr.span)),
3053    }
3054    Ok(())
3055}
3056
3057#[cfg(any(feature = "x86", feature = "x86_64"))]
3058pub(crate) fn encode_xchg(
3059    buf: &mut InstrBytes,
3060    ops: &[Operand],
3061    instr: &Instruction,
3062) -> Result<(), AsmError> {
3063    if ops.len() != 2 {
3064        return Err(invalid_operands("xchg", "expected 2 operands", instr.span));
3065    }
3066    match (&ops[0], &ops[1]) {
3067        (Operand::Register(dst), Operand::Register(src)) => {
3068            let size = reg_size(*dst);
3069            // Special case: xchg ax, r16 or xchg r16, ax → 66 90+rd
3070            if size == 16
3071                && ((dst.base_code() == 0 && !dst.is_extended())
3072                    || (src.base_code() == 0 && !src.is_extended()))
3073            {
3074                let other = if dst.base_code() == 0 && !dst.is_extended() {
3075                    *src
3076                } else {
3077                    *dst
3078                };
3079                let b = other.is_extended();
3080                buf.push(0x66);
3081                if b {
3082                    buf.push(rex(false, false, false, b));
3083                }
3084                buf.push(0x90 + other.base_code());
3085                return Ok(());
3086            }
3087            // Special case: xchg eax/rax, reg or xchg reg, eax/rax → 90+rd
3088            if size >= 32
3089                && ((dst.base_code() == 0 && !dst.is_extended())
3090                    || (src.base_code() == 0 && !src.is_extended()))
3091            {
3092                let other = if dst.base_code() == 0 && !dst.is_extended() {
3093                    *src
3094                } else {
3095                    *dst
3096                };
3097                let w = size == 64;
3098                let b = other.is_extended();
3099                if needs_rex(w, false, false, b) {
3100                    buf.push(rex(w, false, false, b));
3101                }
3102                buf.push(0x90 + other.base_code());
3103                return Ok(());
3104            }
3105            let opcode = if size == 8 { 0x86u8 } else { 0x87u8 };
3106            emit_rr(buf, &[opcode], *dst, *src, instr.span)?;
3107        }
3108        // xchg reg, [mem]
3109        (Operand::Register(reg), Operand::Memory(mem)) => {
3110            let size = reg_size(*reg);
3111            let opcode = if size == 8 { 0x86u8 } else { 0x87u8 };
3112            emit_rex_for_reg_mem(buf, *reg, mem)?;
3113            buf.push(opcode);
3114            emit_mem_modrm(buf, reg.base_code(), mem);
3115        }
3116        // xchg [mem], reg
3117        (Operand::Memory(mem), Operand::Register(reg)) => {
3118            let size = reg_size(*reg);
3119            let opcode = if size == 8 { 0x86u8 } else { 0x87u8 };
3120            emit_rex_for_reg_mem(buf, *reg, mem)?;
3121            buf.push(opcode);
3122            emit_mem_modrm(buf, reg.base_code(), mem);
3123        }
3124        _ => {
3125            return Err(invalid_operands(
3126                "xchg",
3127                "unsupported operand combination",
3128                instr.span,
3129            ))
3130        }
3131    }
3132    Ok(())
3133}
3134
3135#[cfg(any(feature = "x86", feature = "x86_64"))]
3136pub(crate) fn encode_bt(
3137    buf: &mut InstrBytes,
3138    ops: &[Operand],
3139    instr: &Instruction,
3140    digit: u8,
3141) -> Result<(), AsmError> {
3142    if ops.len() != 2 {
3143        return Err(invalid_operands(
3144            &instr.mnemonic,
3145            "expected 2 operands",
3146            instr.span,
3147        ));
3148    }
3149    // BT family only supports 16/32/64-bit operands
3150    if let Operand::Register(r) = &ops[0] {
3151        if reg_size(*r) == 8 {
3152            return Err(invalid_operands(
3153                &instr.mnemonic,
3154                "8-bit operands not supported",
3155                instr.span,
3156            ));
3157        }
3158    }
3159    match (&ops[0], &ops[1]) {
3160        (Operand::Register(dst), Operand::Immediate(imm)) => {
3161            let size = reg_size(*dst);
3162            let w = size == 64;
3163            let b = dst.is_extended();
3164            if size == 16 {
3165                buf.push(0x66);
3166            }
3167            if needs_rex(w, false, false, b) {
3168                buf.push(rex(w, false, false, b));
3169            }
3170            buf.push(0x0F);
3171            buf.push(0xBA);
3172            buf.push(modrm(0b11, digit, dst.base_code()));
3173            buf.push(*imm as u8);
3174        }
3175        (Operand::Register(dst), Operand::Register(src)) => {
3176            let size = reg_size(*dst);
3177            let w = size == 64;
3178            let r = src.is_extended();
3179            let b = dst.is_extended();
3180            if size == 16 {
3181                buf.push(0x66);
3182            }
3183            if needs_rex(w, r, false, b) {
3184                buf.push(rex(w, r, false, b));
3185            }
3186            buf.push(0x0F);
3187            let base = match digit {
3188                4 => 0xA3, // BT
3189                5 => 0xAB, // BTS
3190                6 => 0xB3, // BTR
3191                7 => 0xBB, // BTC
3192                _ => 0xA3,
3193            };
3194            buf.push(base);
3195            buf.push(modrm(0b11, src.base_code(), dst.base_code()));
3196        }
3197        (Operand::Memory(mem), Operand::Register(src)) => {
3198            let size = mem.size.map_or(reg_size(*src), |s| s.bits() as u8);
3199            let w = size == 64;
3200            let r = src.is_extended();
3201            let x = mem.index.is_some_and(|r| r.is_extended());
3202            let b = mem.base.is_some_and(|r| r.is_extended());
3203            if size == 16 {
3204                buf.push(0x66);
3205            }
3206            if needs_rex(w, r, x, b) {
3207                buf.push(rex(w, r, x, b));
3208            }
3209            buf.push(0x0F);
3210            let base = match digit {
3211                4 => 0xA3,
3212                5 => 0xAB,
3213                6 => 0xB3,
3214                7 => 0xBB,
3215                _ => 0xA3,
3216            };
3217            buf.push(base);
3218            emit_mem_modrm(buf, src.base_code(), mem);
3219        }
3220        (Operand::Memory(mem), Operand::Immediate(imm)) => {
3221            let size = instr
3222                .size_hint
3223                .map_or(mem.size.map_or(32u8, |s| s.bits() as u8), |s| {
3224                    s.bits() as u8
3225                });
3226            let w = size == 64;
3227            let x = mem.index.is_some_and(|r| r.is_extended());
3228            let b = mem.base.is_some_and(|r| r.is_extended());
3229            if size == 16 {
3230                buf.push(0x66);
3231            }
3232            if needs_rex(w, false, x, b) {
3233                buf.push(rex(w, false, x, b));
3234            }
3235            buf.push(0x0F);
3236            buf.push(0xBA);
3237            emit_mem_modrm(buf, digit, mem);
3238            buf.push(*imm as u8);
3239        }
3240        _ => {
3241            return Err(invalid_operands(
3242                &instr.mnemonic,
3243                "unsupported operand combination",
3244                instr.span,
3245            ))
3246        }
3247    }
3248    Ok(())
3249}
3250
3251#[cfg(any(feature = "x86", feature = "x86_64"))]
3252pub(crate) fn encode_bsf_bsr(
3253    buf: &mut InstrBytes,
3254    ops: &[Operand],
3255    instr: &Instruction,
3256    opcode2: u8,
3257) -> Result<(), AsmError> {
3258    if ops.len() != 2 {
3259        return Err(invalid_operands(
3260            &instr.mnemonic,
3261            "expected 2 operands",
3262            instr.span,
3263        ));
3264    }
3265    // BSF/BSR only support 16/32/64-bit operands
3266    if let Operand::Register(r) = &ops[0] {
3267        if reg_size(*r) == 8 {
3268            return Err(invalid_operands(
3269                &instr.mnemonic,
3270                "8-bit operands not supported",
3271                instr.span,
3272            ));
3273        }
3274    }
3275    match (&ops[0], &ops[1]) {
3276        (Operand::Register(dst), Operand::Register(src)) => {
3277            let size = reg_size(*dst);
3278            let w = size == 64;
3279            let r = dst.is_extended();
3280            let b = src.is_extended();
3281            if size == 16 {
3282                buf.push(0x66);
3283            }
3284            if needs_rex(w, r, false, b) {
3285                buf.push(rex(w, r, false, b));
3286            }
3287            buf.push(0x0F);
3288            buf.push(opcode2);
3289            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
3290        }
3291        (Operand::Register(dst), Operand::Memory(mem)) => {
3292            emit_rex_for_reg_mem(buf, *dst, mem)?;
3293            buf.push(0x0F);
3294            buf.push(opcode2);
3295            emit_mem_modrm(buf, dst.base_code(), mem);
3296        }
3297        _ => {
3298            return Err(invalid_operands(
3299                &instr.mnemonic,
3300                "expected reg, r/m",
3301                instr.span,
3302            ))
3303        }
3304    }
3305    Ok(())
3306}
3307
3308/// Unified encoder for F3 0F xx reg,r/m class instructions (popcnt, lzcnt, tzcnt).
3309#[inline]
3310#[cfg(any(feature = "x86", feature = "x86_64"))]
3311fn encode_f3_0f_rm(
3312    buf: &mut InstrBytes,
3313    ops: &[Operand],
3314    mnemonic: &str,
3315    opcode: u8,
3316    span: Span,
3317) -> Result<(), AsmError> {
3318    if ops.len() != 2 {
3319        return Err(invalid_operands(mnemonic, "expected 2 operands", span));
3320    }
3321    if let Operand::Register(r) = &ops[0] {
3322        if reg_size(*r) == 8 {
3323            return Err(invalid_operands(
3324                mnemonic,
3325                "8-bit operands not supported",
3326                span,
3327            ));
3328        }
3329    }
3330    match (&ops[0], &ops[1]) {
3331        (Operand::Register(dst), Operand::Register(src)) => {
3332            let size = reg_size(*dst);
3333            let w = size == 64;
3334            let r = dst.is_extended();
3335            let b = src.is_extended();
3336            buf.push(0xF3);
3337            if size == 16 {
3338                buf.push(0x66);
3339            }
3340            if needs_rex(w, r, false, b) {
3341                buf.push(rex(w, r, false, b));
3342            }
3343            buf.push(0x0F);
3344            buf.push(opcode);
3345            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
3346        }
3347        (Operand::Register(dst), Operand::Memory(mem)) => {
3348            buf.push(0xF3);
3349            emit_rex_for_reg_mem(buf, *dst, mem)?;
3350            buf.push(0x0F);
3351            buf.push(opcode);
3352            emit_mem_modrm(buf, dst.base_code(), mem);
3353        }
3354        _ => return Err(invalid_operands(mnemonic, "expected reg, r/m", span)),
3355    }
3356    Ok(())
3357}
3358
3359#[cfg(any(feature = "x86", feature = "x86_64"))]
3360pub(crate) fn encode_popcnt(
3361    buf: &mut InstrBytes,
3362    ops: &[Operand],
3363    instr: &Instruction,
3364) -> Result<(), AsmError> {
3365    encode_f3_0f_rm(buf, ops, "popcnt", 0xB8, instr.span)
3366}
3367
3368#[cfg(any(feature = "x86", feature = "x86_64"))]
3369pub(crate) fn encode_lzcnt(
3370    buf: &mut InstrBytes,
3371    ops: &[Operand],
3372    instr: &Instruction,
3373) -> Result<(), AsmError> {
3374    encode_f3_0f_rm(buf, ops, "lzcnt", 0xBD, instr.span)
3375}
3376
3377#[cfg(any(feature = "x86", feature = "x86_64"))]
3378pub(crate) fn encode_tzcnt(
3379    buf: &mut InstrBytes,
3380    ops: &[Operand],
3381    instr: &Instruction,
3382) -> Result<(), AsmError> {
3383    encode_f3_0f_rm(buf, ops, "tzcnt", 0xBC, instr.span)
3384}
3385
3386#[cfg(any(feature = "x86", feature = "x86_64"))]
3387pub(crate) fn encode_bswap(
3388    buf: &mut InstrBytes,
3389    ops: &[Operand],
3390    instr: &Instruction,
3391) -> Result<(), AsmError> {
3392    if ops.len() != 1 {
3393        return Err(invalid_operands("bswap", "expected 1 operand", instr.span));
3394    }
3395    match &ops[0] {
3396        Operand::Register(reg) => {
3397            let size = reg_size(*reg);
3398            if size == 8 {
3399                return Err(invalid_operands(
3400                    "bswap",
3401                    "8-bit operands not supported",
3402                    instr.span,
3403                ));
3404            }
3405            if size == 16 {
3406                return Err(invalid_operands(
3407                    "bswap",
3408                    "16-bit bswap has undefined behavior; use xchg or rol instead",
3409                    instr.span,
3410                ));
3411            }
3412            let w = size == 64;
3413            let b = reg.is_extended();
3414            if needs_rex(w, false, false, b) {
3415                buf.push(rex(w, false, false, b));
3416            }
3417            buf.push(0x0F);
3418            buf.push(0xC8 + reg.base_code());
3419        }
3420        _ => return Err(invalid_operands("bswap", "expected register", instr.span)),
3421    }
3422    Ok(())
3423}
3424
3425/// Emit an immediate value of the given size.
3426///
3427/// # Panics
3428///
3429/// Panics if `size` is not one of 8, 16, 32, or 64.
3430#[cfg(any(feature = "x86", feature = "x86_64"))]
3431pub(crate) fn emit_imm(buf: &mut InstrBytes, imm: i128, size: u8) {
3432    match size {
3433        8 => buf.push(imm as u8),
3434        16 => buf.extend_from_slice(&(imm as u16).to_le_bytes()),
3435        32 => buf.extend_from_slice(&(imm as u32).to_le_bytes()),
3436        64 => buf.extend_from_slice(&(imm as u64).to_le_bytes()),
3437        other => panic!("emit_imm: unsupported immediate size {other} (expected 8, 16, 32, or 64)"),
3438    }
3439}
3440
3441#[inline]
3442#[cfg(any(feature = "x86", feature = "x86_64"))]
3443pub(crate) fn invalid_operands(_mnemonic: &str, detail: &str, span: Span) -> AsmError {
3444    AsmError::InvalidOperands {
3445        detail: String::from(detail),
3446        span,
3447    }
3448}
3449
3450// ─── SSE / XMM encoder helpers ────────────────────────────────
3451
3452/// Emit REX prefix for an XMM-register,XMM-register or XMM-register,GPR form.
3453/// `w` controls REX.W (needed for movd/movq 64-bit forms).
3454#[cfg(any(feature = "x86", feature = "x86_64"))]
3455fn emit_rex_sse_rr(buf: &mut InstrBytes, w: bool, reg: Register, rm: Register) {
3456    let r = reg.is_extended();
3457    let b = rm.is_extended();
3458    if needs_rex(w, r, false, b) {
3459        buf.push(rex(w, r, false, b));
3460    }
3461}
3462
3463/// Emit REX prefix for XMM register + memory operand.
3464/// `w` controls REX.W.
3465#[cfg(any(feature = "x86", feature = "x86_64"))]
3466fn emit_rex_sse_rm(buf: &mut InstrBytes, w: bool, reg: Register, mem: &MemoryOperand) {
3467    let r = reg.is_extended();
3468    let x = mem.index.is_some_and(|r| r.is_extended());
3469    let b = mem.base.is_some_and(|r| r.is_extended());
3470    if needs_rex(w, r, x, b) {
3471        buf.push(rex(w, r, x, b));
3472    }
3473}
3474
3475/// Encode an SSE instruction: xmm, xmm/m  (or xmm/m, xmm for stores).
3476///
3477/// Pattern: `[mandatory_prefix] [REX] opcode_bytes ModR/M`
3478///
3479/// `opcode` is the full opcode slice (e.g. `&[0x0F, 0x58]` for ADDPS).
3480/// `mandatory_prefix` is 0 (none), 0x66, 0xF3, or 0xF2.
3481/// `rex_w` forces REX.W (needed for 64-bit movd/movq).
3482#[cfg(any(feature = "x86", feature = "x86_64"))]
3483pub(crate) fn encode_sse_rr(
3484    buf: &mut InstrBytes,
3485    mandatory_prefix: u8,
3486    opcode: &[u8],
3487    dst: Register,
3488    src: Register,
3489    rex_w: bool,
3490) {
3491    if mandatory_prefix != 0 {
3492        buf.push(mandatory_prefix);
3493    }
3494    emit_rex_sse_rr(buf, rex_w, dst, src);
3495    buf.extend_from_slice(opcode);
3496    buf.push(modrm(0b11, dst.base_code(), src.base_code()));
3497}
3498
3499/// Encode SSE xmm, mem (load direction): [prefix] REX opcode ModR/M [SIB] [disp]
3500#[cfg(any(feature = "x86", feature = "x86_64"))]
3501pub(crate) fn encode_sse_rm(
3502    buf: &mut InstrBytes,
3503    mandatory_prefix: u8,
3504    opcode: &[u8],
3505    reg: Register,
3506    mem: &MemoryOperand,
3507    reloc: &mut Option<Relocation>,
3508    rex_w: bool,
3509) {
3510    if mandatory_prefix != 0 {
3511        buf.push(mandatory_prefix);
3512    }
3513    emit_rex_sse_rm(buf, rex_w, reg, mem);
3514    buf.extend_from_slice(opcode);
3515    let disp_off = emit_mem_modrm(buf, reg.base_code(), mem);
3516    set_mem_reloc(reloc, mem, disp_off, buf.len());
3517}
3518
3519/// Encode SSE mem, xmm (store direction): [prefix] REX opcode ModR/M [SIB] [disp]
3520/// Same as encode_sse_rm but with swapped semantics (reg is source, mem is dest).
3521#[cfg(any(feature = "x86", feature = "x86_64"))]
3522pub(crate) fn encode_sse_mr(
3523    buf: &mut InstrBytes,
3524    mandatory_prefix: u8,
3525    opcode: &[u8],
3526    mem: &MemoryOperand,
3527    reg: Register,
3528    reloc: &mut Option<Relocation>,
3529    rex_w: bool,
3530) {
3531    // Encoding is identical — reg field goes in ModR/M.reg, mem in ModR/M.rm
3532    encode_sse_rm(buf, mandatory_prefix, opcode, reg, mem, reloc, rex_w);
3533}
3534
3535/// Generic SSE two-operand encoder: xmm, xmm/m or xmm/m, xmm.
3536///
3537/// `reverse` = true means the first operand is memory (store direction, uses `store_opcode`).
3538/// `load_opcode` / `store_opcode` are the full opcode byte sequences.
3539#[cfg(any(feature = "x86", feature = "x86_64"))]
3540pub(crate) fn encode_sse_op(
3541    buf: &mut InstrBytes,
3542    ops: &[Operand],
3543    instr: &Instruction,
3544    mandatory_prefix: u8,
3545    load_opcode: &[u8],
3546    store_opcode: Option<&[u8]>,
3547    reloc: &mut Option<Relocation>,
3548) -> Result<(), AsmError> {
3549    match (ops.first(), ops.get(1)) {
3550        // xmm, xmm
3551        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3552            if dst.is_xmm() && src.is_xmm() =>
3553        {
3554            encode_sse_rr(buf, mandatory_prefix, load_opcode, *dst, *src, false);
3555            Ok(())
3556        }
3557        // xmm, mem
3558        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) if dst.is_xmm() => {
3559            encode_sse_rm(buf, mandatory_prefix, load_opcode, *dst, mem, reloc, false);
3560            Ok(())
3561        }
3562        // mem, xmm (store)
3563        (Some(Operand::Memory(mem)), Some(Operand::Register(src))) if src.is_xmm() => {
3564            let opcode = store_opcode.unwrap_or(load_opcode);
3565            encode_sse_mr(buf, mandatory_prefix, opcode, mem, *src, reloc, false);
3566            Ok(())
3567        }
3568        _ => Err(invalid_operands(
3569            instr.mnemonic.as_str(),
3570            "expected xmm,xmm/m or m,xmm operands",
3571            instr.span,
3572        )),
3573    }
3574}
3575
3576/// SSE instruction with an immediate byte: xmm, xmm/m, imm8.
3577#[cfg(any(feature = "x86", feature = "x86_64"))]
3578pub(crate) fn encode_sse_imm(
3579    buf: &mut InstrBytes,
3580    ops: &[Operand],
3581    instr: &Instruction,
3582    mandatory_prefix: u8,
3583    opcode: &[u8],
3584    reloc: &mut Option<Relocation>,
3585) -> Result<(), AsmError> {
3586    let imm = match ops.get(2) {
3587        Some(Operand::Immediate(v)) => *v,
3588        _ => {
3589            return Err(invalid_operands(
3590                instr.mnemonic.as_str(),
3591                "expected xmm, xmm/m, imm8",
3592                instr.span,
3593            ));
3594        }
3595    };
3596    match (ops.first(), ops.get(1)) {
3597        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3598            if dst.is_xmm() && src.is_xmm() =>
3599        {
3600            encode_sse_rr(buf, mandatory_prefix, opcode, *dst, *src, false);
3601        }
3602        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) if dst.is_xmm() => {
3603            encode_sse_rm(buf, mandatory_prefix, opcode, *dst, mem, reloc, false);
3604        }
3605        _ => {
3606            return Err(invalid_operands(
3607                instr.mnemonic.as_str(),
3608                "expected xmm, xmm/m, imm8",
3609                instr.span,
3610            ));
3611        }
3612    }
3613    buf.push(imm as u8);
3614    Ok(())
3615}
3616
3617/// Encode MOVD/MOVQ — GP ↔ XMM transfers.
3618#[cfg(any(feature = "x86", feature = "x86_64"))]
3619pub(crate) fn encode_movd_movq(
3620    buf: &mut InstrBytes,
3621    ops: &[Operand],
3622    instr: &Instruction,
3623    reloc: &mut Option<Relocation>,
3624    is_movq: bool,
3625) -> Result<(), AsmError> {
3626    match (ops.first(), ops.get(1)) {
3627        // movd/movq xmm, r/m32/64 — load: 66 [REX.W] 0F 6E /r
3628        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3629            if dst.is_xmm() && !src.is_xmm() =>
3630        {
3631            let w = is_movq || src.size_bits() == 64;
3632            encode_sse_rr(buf, 0x66, &[0x0F, 0x6E], *dst, *src, w);
3633            Ok(())
3634        }
3635        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) if dst.is_xmm() => {
3636            let w = is_movq;
3637            encode_sse_rm(buf, 0x66, &[0x0F, 0x6E], *dst, mem, reloc, w);
3638            Ok(())
3639        }
3640        // movd/movq r/m32/64, xmm — store: 66 [REX.W] 0F 7E /r
3641        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3642            if !dst.is_xmm() && src.is_xmm() =>
3643        {
3644            let w = is_movq || dst.size_bits() == 64;
3645            // Note: in the store form, the XMM reg is the source but goes in ModR/M.reg
3646            encode_sse_rr(buf, 0x66, &[0x0F, 0x7E], *src, *dst, w);
3647            Ok(())
3648        }
3649        (Some(Operand::Memory(mem)), Some(Operand::Register(src))) if src.is_xmm() => {
3650            let w = is_movq;
3651            encode_sse_mr(buf, 0x66, &[0x0F, 0x7E], mem, *src, reloc, w);
3652            Ok(())
3653        }
3654        // movq xmm, xmm — use F3 0F 7E /r
3655        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3656            if dst.is_xmm() && src.is_xmm() =>
3657        {
3658            encode_sse_rr(buf, 0xF3, &[0x0F, 0x7E], *dst, *src, false);
3659            Ok(())
3660        }
3661        _ => Err(invalid_operands(
3662            instr.mnemonic.as_str(),
3663            "expected xmm,r/m or r/m,xmm operands",
3664            instr.span,
3665        )),
3666    }
3667}
3668
3669/// Encode CVTSI2SS / CVTSI2SD: xmm, r/m32/64.
3670#[cfg(any(feature = "x86", feature = "x86_64"))]
3671pub(crate) fn encode_cvtsi2(
3672    buf: &mut InstrBytes,
3673    ops: &[Operand],
3674    instr: &Instruction,
3675    mandatory_prefix: u8,
3676    reloc: &mut Option<Relocation>,
3677) -> Result<(), AsmError> {
3678    match (ops.first(), ops.get(1)) {
3679        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3680            if dst.is_xmm() && !src.is_xmm() =>
3681        {
3682            let w = src.size_bits() == 64;
3683            encode_sse_rr(buf, mandatory_prefix, &[0x0F, 0x2A], *dst, *src, w);
3684            Ok(())
3685        }
3686        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) if dst.is_xmm() => {
3687            let w = mem.size == Some(OperandSize::Qword);
3688            encode_sse_rm(buf, mandatory_prefix, &[0x0F, 0x2A], *dst, mem, reloc, w);
3689            Ok(())
3690        }
3691        _ => Err(invalid_operands(
3692            instr.mnemonic.as_str(),
3693            "expected xmm, r/m32 or xmm, r/m64",
3694            instr.span,
3695        )),
3696    }
3697}
3698
3699/// Encode CVTSS2SI / CVTSD2SI / CVTTSS2SI / CVTTSD2SI: r32/64, xmm/m.
3700#[cfg(any(feature = "x86", feature = "x86_64"))]
3701pub(crate) fn encode_cvt2si(
3702    buf: &mut InstrBytes,
3703    ops: &[Operand],
3704    instr: &Instruction,
3705    mandatory_prefix: u8,
3706    opcode2: u8,
3707    reloc: &mut Option<Relocation>,
3708) -> Result<(), AsmError> {
3709    match (ops.first(), ops.get(1)) {
3710        (Some(Operand::Register(dst)), Some(Operand::Register(src)))
3711            if !dst.is_xmm() && src.is_xmm() =>
3712        {
3713            let w = dst.size_bits() == 64;
3714            encode_sse_rr(buf, mandatory_prefix, &[0x0F, opcode2], *dst, *src, w);
3715            Ok(())
3716        }
3717        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) if !dst.is_xmm() => {
3718            let w = dst.size_bits() == 64;
3719            encode_sse_rm(buf, mandatory_prefix, &[0x0F, opcode2], *dst, mem, reloc, w);
3720            Ok(())
3721        }
3722        _ => Err(invalid_operands(
3723            instr.mnemonic.as_str(),
3724            "expected r32/r64, xmm/m operands",
3725            instr.span,
3726        )),
3727    }
3728}
3729
3730/// Encode prefetch family: 0F 18 /digit (memory-only).
3731#[cfg(any(feature = "x86", feature = "x86_64"))]
3732pub(crate) fn encode_prefetch(
3733    buf: &mut InstrBytes,
3734    ops: &[Operand],
3735    instr: &Instruction,
3736    digit: u8,
3737) -> Result<(), AsmError> {
3738    match ops.first() {
3739        Some(Operand::Memory(mem)) => {
3740            // No REX.W, no prefix for prefetch
3741            let x = mem.index.is_some_and(|r| r.is_extended());
3742            let b = mem.base.is_some_and(|r| r.is_extended());
3743            if needs_rex(false, false, x, b) {
3744                buf.push(rex(false, false, x, b));
3745            }
3746            buf.extend_from_slice(&[0x0F, 0x18]);
3747            emit_mem_modrm(buf, digit, mem);
3748            Ok(())
3749        }
3750        _ => Err(invalid_operands(
3751            instr.mnemonic.as_str(),
3752            "expected memory operand",
3753            instr.span,
3754        )),
3755    }
3756}
3757
3758/// Encode CLFLUSH: 0F AE /7 (memory-only).
3759#[cfg(any(feature = "x86", feature = "x86_64"))]
3760pub(crate) fn encode_clflush(
3761    buf: &mut InstrBytes,
3762    ops: &[Operand],
3763    instr: &Instruction,
3764) -> Result<(), AsmError> {
3765    match ops.first() {
3766        Some(Operand::Memory(mem)) => {
3767            let x = mem.index.is_some_and(|r| r.is_extended());
3768            let b = mem.base.is_some_and(|r| r.is_extended());
3769            if needs_rex(false, false, x, b) {
3770                buf.push(rex(false, false, x, b));
3771            }
3772            buf.extend_from_slice(&[0x0F, 0xAE]);
3773            emit_mem_modrm(buf, 7, mem);
3774            Ok(())
3775        }
3776        _ => Err(invalid_operands(
3777            "clflush",
3778            "expected memory operand",
3779            instr.span,
3780        )),
3781    }
3782}
3783
3784/// Encode CLFLUSHOPT: 66 0F AE /7 (memory-only).
3785#[cfg(any(feature = "x86", feature = "x86_64"))]
3786pub(crate) fn encode_clflushopt(
3787    buf: &mut InstrBytes,
3788    ops: &[Operand],
3789    instr: &Instruction,
3790) -> Result<(), AsmError> {
3791    match ops.first() {
3792        Some(Operand::Memory(mem)) => {
3793            buf.push(0x66);
3794            let x = mem.index.is_some_and(|r| r.is_extended());
3795            let b = mem.base.is_some_and(|r| r.is_extended());
3796            if needs_rex(false, false, x, b) {
3797                buf.push(rex(false, false, x, b));
3798            }
3799            buf.extend_from_slice(&[0x0F, 0xAE]);
3800            emit_mem_modrm(buf, 7, mem);
3801            Ok(())
3802        }
3803        _ => Err(invalid_operands(
3804            "clflushopt",
3805            "expected memory operand",
3806            instr.span,
3807        )),
3808    }
3809}
3810
3811/// Encode CLWB: 66 0F AE /6 (memory-only).
3812#[cfg(any(feature = "x86", feature = "x86_64"))]
3813pub(crate) fn encode_clwb(
3814    buf: &mut InstrBytes,
3815    ops: &[Operand],
3816    instr: &Instruction,
3817) -> Result<(), AsmError> {
3818    match ops.first() {
3819        Some(Operand::Memory(mem)) => {
3820            buf.push(0x66);
3821            let x = mem.index.is_some_and(|r| r.is_extended());
3822            let b = mem.base.is_some_and(|r| r.is_extended());
3823            if needs_rex(false, false, x, b) {
3824                buf.push(rex(false, false, x, b));
3825            }
3826            buf.extend_from_slice(&[0x0F, 0xAE]);
3827            emit_mem_modrm(buf, 6, mem);
3828            Ok(())
3829        }
3830        _ => Err(invalid_operands(
3831            "clwb",
3832            "expected memory operand",
3833            instr.span,
3834        )),
3835    }
3836}
3837
3838/// Encode PREFETCHW: 0F 0D /1 (memory-only).
3839#[cfg(any(feature = "x86", feature = "x86_64"))]
3840pub(crate) fn encode_prefetchw(
3841    buf: &mut InstrBytes,
3842    ops: &[Operand],
3843    instr: &Instruction,
3844) -> Result<(), AsmError> {
3845    match ops.first() {
3846        Some(Operand::Memory(mem)) => {
3847            let x = mem.index.is_some_and(|r| r.is_extended());
3848            let b = mem.base.is_some_and(|r| r.is_extended());
3849            if needs_rex(false, false, x, b) {
3850                buf.push(rex(false, false, x, b));
3851            }
3852            buf.extend_from_slice(&[0x0F, 0x0D]);
3853            emit_mem_modrm(buf, 1, mem);
3854            Ok(())
3855        }
3856        _ => Err(invalid_operands(
3857            "prefetchw",
3858            "expected memory operand",
3859            instr.span,
3860        )),
3861    }
3862}
3863
3864/// Encode CRC32: F2 [REX.W] 0F 38 F0/F1 /r.
3865#[cfg(any(feature = "x86", feature = "x86_64"))]
3866pub(crate) fn encode_crc32(
3867    buf: &mut InstrBytes,
3868    ops: &[Operand],
3869    instr: &Instruction,
3870) -> Result<(), AsmError> {
3871    match (ops.first(), ops.get(1)) {
3872        (Some(Operand::Register(dst)), Some(Operand::Register(src))) => {
3873            let dst_s = dst.size_bits();
3874            let src_s = src.size_bits();
3875            if dst_s != 32 && dst_s != 64 {
3876                return Err(invalid_operands(
3877                    "crc32",
3878                    "destination must be r32 or r64",
3879                    instr.span,
3880                ));
3881            }
3882            buf.push(0xF2);
3883            let w = dst_s == 64;
3884            let opcode2 = if src_s == 8 { 0xF0u8 } else { 0xF1 };
3885            if src_s == 16 {
3886                buf.push(0x66);
3887            }
3888            emit_rex_sse_rr(buf, w, *dst, *src);
3889            buf.extend_from_slice(&[0x0F, 0x38, opcode2]);
3890            buf.push(modrm(0b11, dst.base_code(), src.base_code()));
3891            Ok(())
3892        }
3893        (Some(Operand::Register(dst)), Some(Operand::Memory(mem))) => {
3894            let dst_s = dst.size_bits();
3895            if dst_s != 32 && dst_s != 64 {
3896                return Err(invalid_operands(
3897                    "crc32",
3898                    "destination must be r32 or r64",
3899                    instr.span,
3900                ));
3901            }
3902            let src_s = instr.size_hint.map_or_else(
3903                || mem.size.map_or(32u8, |s| s.bits() as u8),
3904                |s| s.bits() as u8,
3905            );
3906            buf.push(0xF2);
3907            let w = dst_s == 64;
3908            let opcode2 = if src_s == 8 { 0xF0u8 } else { 0xF1 };
3909            if src_s == 16 {
3910                buf.push(0x66);
3911            }
3912            emit_rex_sse_rm(buf, w, *dst, mem);
3913            buf.extend_from_slice(&[0x0F, 0x38, opcode2]);
3914            emit_mem_modrm(buf, dst.base_code(), mem);
3915            Ok(())
3916        }
3917        _ => Err(invalid_operands(
3918            "crc32",
3919            "expected r32/r64, r/m operands",
3920            instr.span,
3921        )),
3922    }
3923}
3924
3925// ─── VEX / EVEX prefix encoding infrastructure ──────────────────────────────
3926//
3927// These functions implement VEX and EVEX prefix encoding for SSE/AVX instructions.
3928// They are not yet wired into the text-assembly path but are fully tested and
3929// ready for use once the mnemonic dispatch tables include VEX-encoded mnemonics.
3930
3931/// VEX prefix "pp" field (implied mandatory prefix).
3932///   0 = none, 1 = 0x66, 2 = 0xF3, 3 = 0xF2
3933#[cfg(any(feature = "x86", feature = "x86_64"))]
3934fn vex_pp(mandatory_prefix: u8) -> u8 {
3935    match mandatory_prefix {
3936        0x00 => 0b00,
3937        0x66 => 0b01,
3938        0xF3 => 0b10,
3939        0xF2 => 0b11,
3940        _ => 0b00,
3941    }
3942}
3943
3944/// VEX "m-mmmm" field (implied escape bytes).
3945///   1 = 0F, 2 = 0F 38, 3 = 0F 3A
3946#[cfg(any(feature = "x86", feature = "x86_64"))]
3947fn vex_mmmmm(escape: &[u8]) -> u8 {
3948    match escape {
3949        [0x0F] => 0b00001,
3950        [0x0F, 0x38] => 0b00010,
3951        [0x0F, 0x3A] => 0b00011,
3952        _ => 0b00001,
3953    }
3954}
3955
3956/// Emit a 2-byte VEX prefix: C5 [R vvvv L pp]
3957/// - R: inverted REX.R (1 = no extension)
3958/// - vvvv: inverted source register (NDS), 0b1111 = unused
3959/// - L: vector length (0 = 128-bit, 1 = 256-bit)
3960/// - pp: implied prefix
3961///
3962/// 2-byte VEX can only be used when:
3963///   - m-mmmm == 0b00001 (0F escape)
3964///   - W == 0
3965///   - X == 1 (no REX.X) and B == 1 (no REX.B)
3966#[cfg(any(feature = "x86", feature = "x86_64"))]
3967fn emit_vex2(buf: &mut InstrBytes, r: bool, vvvv: u8, l: bool, pp: u8) {
3968    let byte1 = (if r { 0 } else { 0x80 })
3969        | (((!vvvv) & 0x0F) << 3)
3970        | (if l { 0x04 } else { 0 })
3971        | (pp & 0x03);
3972    buf.push(0xC5);
3973    buf.push(byte1);
3974}
3975
3976/// Emit a 3-byte VEX prefix: C4 [R X B mmmmm] [W vvvv L pp]
3977#[cfg(any(feature = "x86", feature = "x86_64"))]
3978fn emit_vex3(
3979    buf: &mut InstrBytes,
3980    r: bool,
3981    x: bool,
3982    b: bool,
3983    mmmmm: u8,
3984    w: bool,
3985    vvvv: u8,
3986    l: bool,
3987    pp: u8,
3988) {
3989    let byte1 = (if r { 0 } else { 0x80 })
3990        | (if x { 0 } else { 0x40 })
3991        | (if b { 0 } else { 0x20 })
3992        | (mmmmm & 0x1F);
3993    let byte2 = (if w { 0x80 } else { 0 })
3994        | (((!vvvv) & 0x0F) << 3)
3995        | (if l { 0x04 } else { 0 })
3996        | (pp & 0x03);
3997    buf.push(0xC4);
3998    buf.push(byte1);
3999    buf.push(byte2);
4000}
4001
4002/// Choose and emit the most compact VEX prefix (2-byte if possible, else 3-byte).
4003/// For VEX-encoded instructions.
4004///
4005/// Parameters:
4006/// - `reg`: the ModR/M reg field register (or the first source for some forms)
4007/// - `vvvv_reg`: the VEX.vvvv register (NDS/NDD source), or 0 if unused
4008/// - `rm_extended`: whether the R/M or base register is extended (R8-R15, etc.)
4009/// - `x_extended`: whether the SIB index register is extended
4010/// - `w`: REX.W equivalent
4011/// - `l`: vector length (false = 128, true = 256)
4012/// - `pp`: implied mandatory prefix
4013/// - `escape`: the escape byte sequence (e.g., &[0x0F] or &[0x0F, 0x38])
4014#[cfg(any(feature = "x86", feature = "x86_64"))]
4015fn emit_vex_prefix(
4016    buf: &mut InstrBytes,
4017    reg_extended: bool,
4018    x_extended: bool,
4019    rm_extended: bool,
4020    w: bool,
4021    vvvv: u8,
4022    l: bool,
4023    pp: u8,
4024    escape: &[u8],
4025) {
4026    let mmmmm = vex_mmmmm(escape);
4027    // 2-byte VEX can be used when: mmmmm == 0b00001, W == 0, X == 1 (not extended), B == 1 (not extended)
4028    if mmmmm == 0b00001 && !w && !x_extended && !rm_extended {
4029        emit_vex2(buf, reg_extended, vvvv, l, pp);
4030    } else {
4031        emit_vex3(
4032            buf,
4033            reg_extended,
4034            x_extended,
4035            rm_extended,
4036            mmmmm,
4037            w,
4038            vvvv,
4039            l,
4040            pp,
4041        );
4042    }
4043}
4044
4045/// Encode a VEX-prefix instruction: reg, reg (3-operand non-destructive, e.g., vaddps xmm0, xmm1, xmm2)
4046/// `dst` is the ModR/M reg field, `src1` is VEX.vvvv (NDS), `src2` is ModR/M r/m field
4047#[cfg(any(feature = "x86", feature = "x86_64"))]
4048pub(crate) fn encode_vex_rrr(
4049    buf: &mut InstrBytes,
4050    pp: u8,
4051    escape: &[u8],
4052    opcode: u8,
4053    dst: Register,
4054    src1: Register,
4055    src2: Register,
4056    w: bool,
4057    l: bool,
4058) {
4059    let mandatory_pp = vex_pp(pp);
4060    emit_vex_prefix(
4061        buf,
4062        dst.is_extended(),
4063        false,
4064        src2.is_extended(),
4065        w,
4066        src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4067        l,
4068        mandatory_pp,
4069        escape,
4070    );
4071    buf.push(opcode);
4072    buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
4073}
4074
4075/// Encode VEX reg, [mem] (3-operand: dst = reg, src1 = vvvv, src2 = mem)
4076#[cfg(any(feature = "x86", feature = "x86_64"))]
4077pub(crate) fn encode_vex_rrm(
4078    buf: &mut InstrBytes,
4079    pp: u8,
4080    escape: &[u8],
4081    opcode: u8,
4082    dst: Register,
4083    src1: Register,
4084    mem: &MemoryOperand,
4085    reloc: &mut Option<Relocation>,
4086    w: bool,
4087    l: bool,
4088) {
4089    let mandatory_pp = vex_pp(pp);
4090    let x_ext = mem.index.is_some_and(|r| r.is_extended());
4091    let b_ext = mem.base.is_some_and(|r| r.is_extended());
4092    emit_vex_prefix(
4093        buf,
4094        dst.is_extended(),
4095        x_ext,
4096        b_ext,
4097        w,
4098        src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4099        l,
4100        mandatory_pp,
4101        escape,
4102    );
4103    buf.push(opcode);
4104    emit_mem_modrm(buf, dst.base_code(), mem);
4105    if let Some(ref mut rel) = reloc {
4106        rel.offset = buf.len() - 4;
4107    }
4108}
4109
4110/// Generic VEX-encoded SSE/AVX instruction dispatcher.
4111/// Handles the common patterns:
4112///   - vop xmm/ymm, xmm/ymm, xmm/ymm  (3 register operands, NDS form)
4113///   - vop xmm/ymm, xmm/ymm, [mem]     (reg, reg, mem)
4114///   - vop xmm/ymm, xmm/ymm            (2 operands => dst=src1=first, src2=second for moves)
4115///   - vop [mem], xmm/ymm               (store, if store_opcode given)
4116#[cfg(any(feature = "x86", feature = "x86_64"))]
4117pub(crate) fn encode_vex_op(
4118    buf: &mut InstrBytes,
4119    ops: &[Operand],
4120    instr: &Instruction,
4121    pp: u8,
4122    escape: &[u8],
4123    load_opcode: u8,
4124    store_opcode: Option<u8>,
4125    w: bool,
4126    reloc: &mut Option<Relocation>,
4127) -> Result<(), AsmError> {
4128    use Operand::*;
4129    let o = (ops.first(), ops.get(1), ops.get(2));
4130    match o {
4131        // 3-operand: reg, reg, reg
4132        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)))
4133            if dst.is_vector() && src1.is_vector() && src2.is_vector() =>
4134        {
4135            let l = dst.is_ymm() || src1.is_ymm();
4136            encode_vex_rrr(buf, pp, escape, load_opcode, *dst, *src1, *src2, w, l);
4137            Ok(())
4138        }
4139        // 3-operand: reg, reg, mem
4140        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)))
4141            if dst.is_vector() && src1.is_vector() =>
4142        {
4143            let l = dst.is_ymm() || src1.is_ymm();
4144            encode_vex_rrm(buf, pp, escape, load_opcode, *dst, *src1, mem, reloc, w, l);
4145            Ok(())
4146        }
4147        // 2-operand reg, reg (move-like: vvvv unused)
4148        (Some(Register(dst)), Some(Register(src)), None) if dst.is_vector() && src.is_vector() => {
4149            let l = dst.is_ymm() || src.is_ymm();
4150            let mandatory_pp = vex_pp(pp);
4151            emit_vex_prefix(
4152                buf,
4153                dst.is_extended(),
4154                false,
4155                src.is_extended(),
4156                w,
4157                0,
4158                l,
4159                mandatory_pp,
4160                escape,
4161            );
4162            buf.push(load_opcode);
4163            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
4164            Ok(())
4165        }
4166        // 2-operand reg, mem (load)
4167        (Some(Register(dst)), Some(Memory(mem)), None) if dst.is_vector() => {
4168            let l = dst.is_ymm();
4169            let mandatory_pp = vex_pp(pp);
4170            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4171            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4172            emit_vex_prefix(
4173                buf,
4174                dst.is_extended(),
4175                x_ext,
4176                b_ext,
4177                w,
4178                0,
4179                l,
4180                mandatory_pp,
4181                escape,
4182            );
4183            buf.push(load_opcode);
4184            emit_mem_modrm(buf, dst.base_code(), mem);
4185            Ok(())
4186        }
4187        // 2-operand mem, reg (store)
4188        (Some(Memory(mem)), Some(Register(src)), None)
4189            if src.is_vector() && store_opcode.is_some() =>
4190        {
4191            let l = src.is_ymm();
4192            let mandatory_pp = vex_pp(pp);
4193            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4194            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4195            emit_vex_prefix(
4196                buf,
4197                src.is_extended(),
4198                x_ext,
4199                b_ext,
4200                w,
4201                0,
4202                l,
4203                mandatory_pp,
4204                escape,
4205            );
4206            // SAFETY: match guard `store_opcode.is_some()` guarantees Some
4207            buf.push(store_opcode.unwrap_or(0));
4208            emit_mem_modrm(buf, src.base_code(), mem);
4209            Ok(())
4210        }
4211        _ => Err(invalid_operands(
4212            &instr.mnemonic,
4213            "expected VEX xmm/ymm operands",
4214            instr.span,
4215        )),
4216    }
4217}
4218
4219/// VEX instruction with an immediate byte (4 operands: dst, src1, src2/mem, imm8)
4220#[cfg(any(feature = "x86", feature = "x86_64"))]
4221pub(crate) fn encode_vex_imm(
4222    buf: &mut InstrBytes,
4223    ops: &[Operand],
4224    instr: &Instruction,
4225    pp: u8,
4226    escape: &[u8],
4227    opcode: u8,
4228    w: bool,
4229    reloc: &mut Option<Relocation>,
4230) -> Result<(), AsmError> {
4231    use Operand::*;
4232    let o = (ops.first(), ops.get(1), ops.get(2), ops.get(3));
4233    match o {
4234        // 4-operand: reg, reg, reg, imm8
4235        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)), Some(Immediate(imm)))
4236            if dst.is_vector() && src1.is_vector() && src2.is_vector() =>
4237        {
4238            let l = dst.is_ymm() || src1.is_ymm();
4239            encode_vex_rrr(buf, pp, escape, opcode, *dst, *src1, *src2, w, l);
4240            buf.push(*imm as u8);
4241            Ok(())
4242        }
4243        // 4-operand: reg, reg, mem, imm8
4244        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)), Some(Immediate(imm)))
4245            if dst.is_vector() && src1.is_vector() =>
4246        {
4247            let l = dst.is_ymm() || src1.is_ymm();
4248            encode_vex_rrm(buf, pp, escape, opcode, *dst, *src1, mem, reloc, w, l);
4249            buf.push(*imm as u8);
4250            Ok(())
4251        }
4252        // 3-operand: reg, reg/mem, imm8 (vvvv unused, like vpshufd)
4253        (Some(Register(dst)), Some(Register(src)), Some(Immediate(imm)), None)
4254            if dst.is_vector() && src.is_vector() =>
4255        {
4256            let l = dst.is_ymm() || src.is_ymm();
4257            let mandatory_pp = vex_pp(pp);
4258            emit_vex_prefix(
4259                buf,
4260                dst.is_extended(),
4261                false,
4262                src.is_extended(),
4263                w,
4264                0,
4265                l,
4266                mandatory_pp,
4267                escape,
4268            );
4269            buf.push(opcode);
4270            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
4271            buf.push(*imm as u8);
4272            Ok(())
4273        }
4274        (Some(Register(dst)), Some(Memory(mem)), Some(Immediate(imm)), None) if dst.is_vector() => {
4275            let l = dst.is_ymm();
4276            let mandatory_pp = vex_pp(pp);
4277            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4278            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4279            emit_vex_prefix(
4280                buf,
4281                dst.is_extended(),
4282                x_ext,
4283                b_ext,
4284                w,
4285                0,
4286                l,
4287                mandatory_pp,
4288                escape,
4289            );
4290            buf.push(opcode);
4291            emit_mem_modrm(buf, dst.base_code(), mem);
4292            buf.push(*imm as u8);
4293            Ok(())
4294        }
4295        _ => Err(invalid_operands(
4296            &instr.mnemonic,
4297            "expected VEX xmm/ymm operands with imm8",
4298            instr.span,
4299        )),
4300    }
4301}
4302
4303/// VEX-encoded BMI instruction: reg, reg/mem (2-operand, VEX.vvvv = dst)
4304/// e.g., ANDN r32, r32, r/m32; BLSI r32, r/m32; TZCNT-like
4305#[cfg(any(feature = "x86", feature = "x86_64"))]
4306pub(crate) fn encode_vex_bmi_vex_ndd(
4307    buf: &mut InstrBytes,
4308    ops: &[Operand],
4309    instr: &Instruction,
4310    pp: u8,
4311    escape: &[u8],
4312    opcode: u8,
4313    w_from_size: bool,
4314) -> Result<(), AsmError> {
4315    use Operand::*;
4316    match (ops.first(), ops.get(1), ops.get(2)) {
4317        // 3-operand: r, r, r/m (e.g., andn eax, ebx, ecx)
4318        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2))) => {
4319            let w = if w_from_size {
4320                dst.size_bits() == 64
4321            } else {
4322                false
4323            };
4324            let mandatory_pp = vex_pp(pp);
4325            emit_vex_prefix(
4326                buf,
4327                dst.is_extended(),
4328                false,
4329                src2.is_extended(),
4330                w,
4331                src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4332                false,
4333                mandatory_pp,
4334                escape,
4335            );
4336            buf.push(opcode);
4337            buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
4338            Ok(())
4339        }
4340        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem))) => {
4341            let w = if w_from_size {
4342                dst.size_bits() == 64
4343            } else {
4344                false
4345            };
4346            let mandatory_pp = vex_pp(pp);
4347            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4348            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4349            emit_vex_prefix(
4350                buf,
4351                dst.is_extended(),
4352                x_ext,
4353                b_ext,
4354                w,
4355                src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4356                false,
4357                mandatory_pp,
4358                escape,
4359            );
4360            buf.push(opcode);
4361            emit_mem_modrm(buf, dst.base_code(), mem);
4362            Ok(())
4363        }
4364        // 2-operand forms (e.g., blsi r32, r/m32 — dst in VEX.vvvv, src in ModR/M r/m)
4365        (Some(Register(dst)), Some(Register(src)), None) => {
4366            let w = if w_from_size {
4367                dst.size_bits() == 64
4368            } else {
4369                false
4370            };
4371            let mandatory_pp = vex_pp(pp);
4372            emit_vex_prefix(
4373                buf,
4374                false, // reg field is the /digit, not a real register
4375                false,
4376                src.is_extended(),
4377                w,
4378                dst.base_code() | if dst.is_extended() { 8 } else { 0 },
4379                false,
4380                mandatory_pp,
4381                escape,
4382            );
4383            buf.push(opcode);
4384            // The reg field contains the /digit (set by caller via opcode encoding)
4385            // Actually for BLSI/BLSR/BLSMSK the reg field is fixed (/3, /1, /2)
4386            // This function sets reg=0; caller must set via a wrapper
4387            buf.push(0xC0 | src.base_code());
4388            Ok(())
4389        }
4390        (Some(Register(dst)), Some(Memory(mem)), None) => {
4391            let w = if w_from_size {
4392                dst.size_bits() == 64
4393            } else {
4394                false
4395            };
4396            let mandatory_pp = vex_pp(pp);
4397            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4398            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4399            emit_vex_prefix(
4400                buf,
4401                false,
4402                x_ext,
4403                b_ext,
4404                w,
4405                dst.base_code() | if dst.is_extended() { 8 } else { 0 },
4406                false,
4407                mandatory_pp,
4408                escape,
4409            );
4410            buf.push(opcode);
4411            emit_mem_modrm(buf, 0, mem);
4412            Ok(())
4413        }
4414        _ => Err(invalid_operands(
4415            &instr.mnemonic,
4416            "expected GP register operands",
4417            instr.span,
4418        )),
4419    }
4420}
4421
4422/// VEX BMI with /digit in the reg field + VEX.vvvv = dst  (BLSI, BLSR, BLSMSK)
4423#[cfg(any(feature = "x86", feature = "x86_64"))]
4424pub(crate) fn encode_vex_bmi_digit(
4425    buf: &mut InstrBytes,
4426    ops: &[Operand],
4427    instr: &Instruction,
4428    pp: u8,
4429    escape: &[u8],
4430    opcode: u8,
4431    digit: u8,
4432    w_from_size: bool,
4433) -> Result<(), AsmError> {
4434    use Operand::*;
4435    match (ops.first(), ops.get(1)) {
4436        (Some(Register(dst)), Some(Register(src))) => {
4437            let w = if w_from_size {
4438                dst.size_bits() == 64
4439            } else {
4440                false
4441            };
4442            let mandatory_pp = vex_pp(pp);
4443            emit_vex_prefix(
4444                buf,
4445                false, // reg field is /digit, never extended
4446                false,
4447                src.is_extended(),
4448                w,
4449                dst.base_code() | if dst.is_extended() { 8 } else { 0 },
4450                false,
4451                mandatory_pp,
4452                escape,
4453            );
4454            buf.push(opcode);
4455            buf.push(0xC0 | (digit << 3) | src.base_code());
4456            Ok(())
4457        }
4458        (Some(Register(dst)), Some(Memory(mem))) => {
4459            let w = if w_from_size {
4460                dst.size_bits() == 64
4461            } else {
4462                false
4463            };
4464            let mandatory_pp = vex_pp(pp);
4465            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4466            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4467            emit_vex_prefix(
4468                buf,
4469                false,
4470                x_ext,
4471                b_ext,
4472                w,
4473                dst.base_code() | if dst.is_extended() { 8 } else { 0 },
4474                false,
4475                mandatory_pp,
4476                escape,
4477            );
4478            buf.push(opcode);
4479            emit_mem_modrm(buf, digit, mem);
4480            Ok(())
4481        }
4482        _ => Err(invalid_operands(
4483            &instr.mnemonic,
4484            "expected r32/r64, r/m32/r/m64",
4485            instr.span,
4486        )),
4487    }
4488}
4489
4490/// VEX BMI2 with immediate (RORX: VEX.LZ.F2.0F3A.W0/W1 F0 /r ib)
4491#[cfg(any(feature = "x86", feature = "x86_64"))]
4492pub(crate) fn encode_vex_bmi_imm(
4493    buf: &mut InstrBytes,
4494    ops: &[Operand],
4495    instr: &Instruction,
4496    pp: u8,
4497    escape: &[u8],
4498    opcode: u8,
4499    w_from_size: bool,
4500) -> Result<(), AsmError> {
4501    use Operand::*;
4502    match (ops.first(), ops.get(1), ops.get(2)) {
4503        (Some(Register(dst)), Some(Register(src)), Some(Immediate(imm))) => {
4504            let w = if w_from_size {
4505                dst.size_bits() == 64
4506            } else {
4507                false
4508            };
4509            let mandatory_pp = vex_pp(pp);
4510            emit_vex_prefix(
4511                buf,
4512                dst.is_extended(),
4513                false,
4514                src.is_extended(),
4515                w,
4516                0,
4517                false,
4518                mandatory_pp,
4519                escape,
4520            );
4521            buf.push(opcode);
4522            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
4523            buf.push(*imm as u8);
4524            Ok(())
4525        }
4526        (Some(Register(dst)), Some(Memory(mem)), Some(Immediate(imm))) => {
4527            let w = if w_from_size {
4528                dst.size_bits() == 64
4529            } else {
4530                false
4531            };
4532            let mandatory_pp = vex_pp(pp);
4533            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4534            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4535            emit_vex_prefix(
4536                buf,
4537                dst.is_extended(),
4538                x_ext,
4539                b_ext,
4540                w,
4541                0,
4542                false,
4543                mandatory_pp,
4544                escape,
4545            );
4546            buf.push(opcode);
4547            emit_mem_modrm(buf, dst.base_code(), mem);
4548            buf.push(*imm as u8);
4549            Ok(())
4550        }
4551        _ => Err(invalid_operands(
4552            &instr.mnemonic,
4553            "expected r, r/m, imm8",
4554            instr.span,
4555        )),
4556    }
4557}
4558
4559/// VEX BMI instruction with reversed operand mapping: dst(reg), src(r/m), control(vvvv)
4560/// Used for BEXTR, BZHI, SARX, SHLX, SHRX where the second operand is
4561/// the ModR/M r/m field and the third operand is VEX.vvvv.
4562#[cfg(any(feature = "x86", feature = "x86_64"))]
4563pub(crate) fn encode_vex_bmi_rmv(
4564    buf: &mut InstrBytes,
4565    ops: &[Operand],
4566    instr: &Instruction,
4567    pp: u8,
4568    escape: &[u8],
4569    opcode: u8,
4570    w_from_size: bool,
4571) -> Result<(), AsmError> {
4572    use Operand::*;
4573    match (ops.first(), ops.get(1), ops.get(2)) {
4574        // 3-operand: r, r, r (dst=reg, src=r/m, control=vvvv)
4575        (Some(Register(dst)), Some(Register(src)), Some(Register(ctrl))) => {
4576            let w = if w_from_size {
4577                dst.size_bits() == 64
4578            } else {
4579                false
4580            };
4581            let mandatory_pp = vex_pp(pp);
4582            emit_vex_prefix(
4583                buf,
4584                dst.is_extended(),
4585                false,
4586                src.is_extended(),
4587                w,
4588                ctrl.base_code() | if ctrl.is_extended() { 8 } else { 0 },
4589                false,
4590                mandatory_pp,
4591                escape,
4592            );
4593            buf.push(opcode);
4594            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
4595            Ok(())
4596        }
4597        // 3-operand: r, mem, r (dst=reg, src=r/m, control=vvvv)
4598        (Some(Register(dst)), Some(Memory(mem)), Some(Register(ctrl))) => {
4599            let w = if w_from_size {
4600                dst.size_bits() == 64
4601            } else {
4602                false
4603            };
4604            let mandatory_pp = vex_pp(pp);
4605            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4606            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4607            emit_vex_prefix(
4608                buf,
4609                dst.is_extended(),
4610                x_ext,
4611                b_ext,
4612                w,
4613                ctrl.base_code() | if ctrl.is_extended() { 8 } else { 0 },
4614                false,
4615                mandatory_pp,
4616                escape,
4617            );
4618            buf.push(opcode);
4619            emit_mem_modrm(buf, dst.base_code(), mem);
4620            Ok(())
4621        }
4622        _ => Err(invalid_operands(
4623            &instr.mnemonic,
4624            "expected r, r/m, r",
4625            instr.span,
4626        )),
4627    }
4628}
4629
4630/// VEX-encoded packed shift instruction dispatcher.
4631///
4632/// Handles two forms:
4633///   1. `vpsllw xmm1, xmm2, xmm3/m128` — 3-operand NDS (reg, reg, reg/mem)
4634///   2. `vpsllw xmm1, xmm2, imm8`       — 3-operand NDD with /digit (reg, reg, imm)
4635///
4636/// Parameters:
4637///   - `reg_opcode`: opcode for the reg,reg,reg/mem form (ModR/M reg = dst)
4638///   - `imm_opcode`: opcode for the reg,reg,imm8 form (/digit in reg field)
4639///   - `digit`: the /digit value for the immediate form
4640#[cfg(any(feature = "x86", feature = "x86_64"))]
4641pub(crate) fn encode_vex_shift(
4642    buf: &mut InstrBytes,
4643    ops: &[Operand],
4644    instr: &Instruction,
4645    pp: u8,
4646    escape: &[u8],
4647    reg_opcode: u8,
4648    imm_opcode: u8,
4649    digit: u8,
4650    reloc: &mut Option<Relocation>,
4651) -> Result<(), AsmError> {
4652    use Operand::*;
4653    match (ops.first(), ops.get(1), ops.get(2)) {
4654        // 3-operand: reg, reg, reg (shift by register/xmm)
4655        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)))
4656            if dst.is_vector() && src1.is_vector() && src2.is_vector() =>
4657        {
4658            let l = dst.is_ymm() || src1.is_ymm();
4659            encode_vex_rrr(buf, pp, escape, reg_opcode, *dst, *src1, *src2, false, l);
4660            Ok(())
4661        }
4662        // 3-operand: reg, reg, mem (shift by memory)
4663        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)))
4664            if dst.is_vector() && src1.is_vector() =>
4665        {
4666            let l = dst.is_ymm() || src1.is_ymm();
4667            encode_vex_rrm(
4668                buf, pp, escape, reg_opcode, *dst, *src1, mem, reloc, false, l,
4669            );
4670            Ok(())
4671        }
4672        // 3-operand: reg, reg, imm8 (shift by immediate — NDD form)
4673        (Some(Register(dst)), Some(Register(src)), Some(Immediate(imm)))
4674            if dst.is_vector() && src.is_vector() =>
4675        {
4676            let l = dst.is_ymm() || src.is_ymm();
4677            let mandatory_pp = vex_pp(pp);
4678            // NDD: VEX.vvvv = dst, ModR/M r/m = src, ModR/M reg = /digit
4679            emit_vex_prefix(
4680                buf,
4681                false, // reg field is /digit, never extended
4682                false,
4683                src.is_extended(),
4684                false, // W = 0 (WIG)
4685                dst.base_code() | if dst.is_extended() { 8 } else { 0 },
4686                l,
4687                mandatory_pp,
4688                escape,
4689            );
4690            buf.push(imm_opcode);
4691            buf.push(0xC0 | (digit << 3) | src.base_code());
4692            buf.push(*imm as u8);
4693            Ok(())
4694        }
4695        _ => Err(invalid_operands(
4696            &instr.mnemonic,
4697            "expected VEX xmm/ymm shift operands",
4698            instr.span,
4699        )),
4700    }
4701}
4702
4703/// VEX-encoded instruction with mixed GP/XMM operands.
4704///
4705/// Handles conversions like VCVTSI2SS (xmm, xmm, r/m32/64):
4706///   - dst(xmm) = ModR/M reg, src1(xmm) = VEX.vvvv (NDS), src2(GP/mem) = ModR/M r/m
4707///
4708/// Also handles the reverse (r32, xmm/m): VCVTSS2SI, VCVTTSS2SI
4709#[cfg(any(feature = "x86", feature = "x86_64"))]
4710pub(crate) fn encode_vex_cvt(
4711    buf: &mut InstrBytes,
4712    ops: &[Operand],
4713    instr: &Instruction,
4714    pp: u8,
4715    escape: &[u8],
4716    opcode: u8,
4717    _reloc: &mut Option<Relocation>,
4718) -> Result<(), AsmError> {
4719    use Operand::*;
4720    match (ops.first(), ops.get(1), ops.get(2)) {
4721        // 3-operand: xmm, xmm, r/m (VCVTSI2SS xmm, xmm, r32/r64/m32/m64)
4722        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)))
4723            if dst.is_xmm() && src1.is_xmm() =>
4724        {
4725            let w = src2.size_bits() == 64;
4726            let mandatory_pp = vex_pp(pp);
4727            emit_vex_prefix(
4728                buf,
4729                dst.is_extended(),
4730                false,
4731                src2.is_extended(),
4732                w,
4733                src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4734                false,
4735                mandatory_pp,
4736                escape,
4737            );
4738            buf.push(opcode);
4739            buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
4740            Ok(())
4741        }
4742        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)))
4743            if dst.is_xmm() && src1.is_xmm() =>
4744        {
4745            let w = mem.size.is_some_and(|s| s.bits() == 64);
4746            let mandatory_pp = vex_pp(pp);
4747            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4748            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4749            emit_vex_prefix(
4750                buf,
4751                dst.is_extended(),
4752                x_ext,
4753                b_ext,
4754                w,
4755                src1.base_code() | if src1.is_extended() { 8 } else { 0 },
4756                false,
4757                mandatory_pp,
4758                escape,
4759            );
4760            buf.push(opcode);
4761            emit_mem_modrm(buf, dst.base_code(), mem);
4762            Ok(())
4763        }
4764        // 2-operand: r32/r64, xmm (VCVTSS2SI r32, xmm)
4765        (Some(Register(dst)), Some(Register(src)), None) if !dst.is_vector() && src.is_xmm() => {
4766            let w = dst.size_bits() == 64;
4767            let mandatory_pp = vex_pp(pp);
4768            emit_vex_prefix(
4769                buf,
4770                dst.is_extended(),
4771                false,
4772                src.is_extended(),
4773                w,
4774                0,
4775                false,
4776                mandatory_pp,
4777                escape,
4778            );
4779            buf.push(opcode);
4780            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
4781            Ok(())
4782        }
4783        // 2-operand: r32/r64, mem (VCVTSS2SI r32, m32)
4784        (Some(Register(dst)), Some(Memory(mem)), None) if !dst.is_vector() => {
4785            let w = dst.size_bits() == 64;
4786            let mandatory_pp = vex_pp(pp);
4787            let x_ext = mem.index.is_some_and(|r| r.is_extended());
4788            let b_ext = mem.base.is_some_and(|r| r.is_extended());
4789            emit_vex_prefix(
4790                buf,
4791                dst.is_extended(),
4792                x_ext,
4793                b_ext,
4794                w,
4795                0,
4796                false,
4797                mandatory_pp,
4798                escape,
4799            );
4800            buf.push(opcode);
4801            emit_mem_modrm(buf, dst.base_code(), mem);
4802            Ok(())
4803        }
4804        _ => Err(invalid_operands(
4805            &instr.mnemonic,
4806            "expected conversion operands",
4807            instr.span,
4808        )),
4809    }
4810}
4811
4812// ─── EVEX prefix encoding infrastructure (AVX-512) ──────────────────────────
4813//
4814// EVEX is a 4-byte prefix (62h + P0 P1 P2) used by AVX-512 instructions.
4815//
4816// P0: [R  X  B  R' 0 0 m m]      — R/X/B from legacy REX, R' extends ModR/M.reg to 5 bits, mm = map
4817// P1: [W  v3 v2 v1 v0 1  p p]    — W, vvvv (inverted NDS src), fixed 1, pp
4818// P2: [z  L' L  b  V' a a a]     — z=zeroing, L'L=vector length, b=broadcast, V' extends vvvv, aaa=opmask
4819//
4820// Vector length: L'L = 00 → 128, 01 → 256, 10 → 512
4821
4822/// Emit a 4-byte EVEX prefix: 62 [P0] [P1] [P2]
4823///
4824/// Parameters:
4825/// - `r_ext`: ModR/M.reg is extended (bit 3 via ~R, like REX.R)
4826/// - `x_ext`: SIB.index is extended (bit 3 via ~X, like REX.X)
4827/// - `b_ext`: ModR/M.rm / SIB.base is extended (bit 3 via ~B, like REX.B)
4828/// - `r_prime`: ModR/M.reg bit 4 (EVEX.R', inverted — for regs 16-31)
4829/// - `mm`: map select (01 = 0F, 02 = 0F38, 03 = 0F3A)
4830/// - `w`: operand size promotion (like REX.W)
4831/// - `vvvv`: NDS source register (4-bit, inverted in prefix)
4832/// - `v_prime`: vvvv bit 4 (EVEX.V', inverted — for NDS regs 16-31)
4833/// - `pp`: implied mandatory prefix (00=none, 01=66, 10=F3, 11=F2)
4834/// - `z`: zeroing-masking (1 = zero, 0 = merge)
4835/// - `ll`: vector length (0=128, 1=256, 2=512)
4836/// - `b_bit`: broadcast / rounding control / SAE
4837/// - `aaa`: opmask register number (0 = no mask)
4838#[cfg(any(feature = "x86", feature = "x86_64"))]
4839fn emit_evex(
4840    buf: &mut InstrBytes,
4841    r_ext: bool,
4842    x_ext: bool,
4843    b_ext: bool,
4844    r_prime: bool,
4845    mm: u8,
4846    w: bool,
4847    vvvv: u8,
4848    v_prime: bool,
4849    pp: u8,
4850    z: bool,
4851    ll: u8,
4852    b_bit: bool,
4853    aaa: u8,
4854) {
4855    // P0: ~R ~X ~B ~R' 0 0 mm
4856    let p0 = (if r_ext { 0 } else { 0x80 })
4857        | (if x_ext { 0 } else { 0x40 })
4858        | (if b_ext { 0 } else { 0x20 })
4859        | (if r_prime { 0 } else { 0x10 })
4860        | (mm & 0x03);
4861
4862    // P1: W ~v3 ~v2 ~v1 ~v0 1 pp
4863    let p1 = (if w { 0x80 } else { 0 })
4864        | (((!vvvv) & 0x0F) << 3)
4865        | 0x04 // fixed bit
4866        | (pp & 0x03);
4867
4868    // P2: z L'L b ~V' aaa
4869    let p2 = (if z { 0x80 } else { 0 })
4870        | ((ll & 0x03) << 5)
4871        | (if b_bit { 0x10 } else { 0 })
4872        | (if v_prime { 0 } else { 0x08 })
4873        | (aaa & 0x07);
4874
4875    buf.push(0x62);
4876    buf.push(p0);
4877    buf.push(p1);
4878    buf.push(p2);
4879}
4880
4881/// Helper: compute EVEX register encoding bits from a Register.
4882/// Returns (base_code_3bit, is_extended_bit3, is_evex_bit4)
4883#[cfg(any(feature = "x86", feature = "x86_64"))]
4884fn evex_reg_bits(reg: Register) -> (u8, bool, bool) {
4885    (reg.base_code(), reg.is_extended(), reg.is_evex_extended())
4886}
4887
4888/// Helper: Compute the EVEX vector length field from operand registers.
4889/// Returns the L'L value: 0 = 128 (XMM), 1 = 256 (YMM), 2 = 512 (ZMM).
4890#[cfg(any(feature = "x86", feature = "x86_64"))]
4891fn evex_ll(reg: Register) -> u8 {
4892    if reg.is_zmm() {
4893        2
4894    } else if reg.is_ymm() {
4895        1
4896    } else {
4897        0
4898    }
4899}
4900
4901/// Emit EVEX prefix for a reg,reg,reg instruction.
4902/// `dst` = ModR/M.reg (destination), `src1` = vvvv (NDS), `src2` = ModR/M.rm
4903///
4904/// `aaa` and `z` support opmask + zeroing. Pass aaa=0, z=false for unmasked.
4905#[cfg(any(feature = "x86", feature = "x86_64"))]
4906fn emit_evex_prefix_rrr(
4907    buf: &mut InstrBytes,
4908    dst: Register,
4909    src1: Register,
4910    src2: Register,
4911    mm: u8,
4912    w: bool,
4913    pp: u8,
4914    ll: u8,
4915    aaa: u8,
4916    z: bool,
4917) {
4918    let (_, dst_ext, dst_evex) = evex_reg_bits(dst);
4919    let (src1_code, src1_ext, src1_evex) = evex_reg_bits(src1);
4920    let (_, src2_ext, src2_evex) = evex_reg_bits(src2);
4921
4922    let vvvv = src1_code | if src1_ext { 8 } else { 0 };
4923
4924    emit_evex(
4925        buf, dst_ext,   // R: reg bit 3
4926        src2_evex, // X: r/m bit 4 (repurposed in reg-reg form)
4927        src2_ext,  // B: r/m bit 3
4928        dst_evex,  // R': reg bit 4
4929        mm, w, vvvv, src1_evex, // V': vvvv bit 4
4930        pp, z, ll, false, // b: no broadcast for reg-reg
4931        aaa,
4932    );
4933}
4934
4935/// Emit EVEX prefix for a reg,reg,mem instruction.
4936/// `dst` = ModR/M.reg (destination), `src1` = vvvv (NDS), `mem` = memory operand
4937#[cfg(any(feature = "x86", feature = "x86_64"))]
4938fn emit_evex_prefix_rrm(
4939    buf: &mut InstrBytes,
4940    dst: Register,
4941    src1: Register,
4942    mem: &MemoryOperand,
4943    mm: u8,
4944    w: bool,
4945    pp: u8,
4946    ll: u8,
4947    b_bit: bool,
4948    aaa: u8,
4949    z: bool,
4950) {
4951    let (_, dst_ext, dst_evex) = evex_reg_bits(dst);
4952    let (src1_code, src1_ext, src1_evex) = evex_reg_bits(src1);
4953
4954    let vvvv = src1_code | if src1_ext { 8 } else { 0 };
4955    let x_ext = mem.index.is_some_and(|r| r.is_extended());
4956    let b_ext = mem.base.is_some_and(|r| r.is_extended());
4957
4958    emit_evex(
4959        buf, dst_ext, x_ext, b_ext, dst_evex, mm, w, vvvv, src1_evex, pp, z, ll, b_bit, aaa,
4960    );
4961}
4962
4963/// Generic EVEX-encoded AVX-512 instruction dispatcher.
4964/// Handles the common patterns:
4965///   - evex_op zmm, zmm, zmm    (3 register operands, NDS form)
4966///   - evex_op zmm, zmm, [mem]  (reg, reg, mem — optional broadcast)
4967///   - evex_op zmm, zmm         (2-operand reg-reg, vvvv=0)
4968///   - evex_op zmm, [mem]       (2-operand load)
4969///   - evex_op [mem], zmm       (2-operand store)
4970///
4971/// Opmask/zeroing/broadcast are read from `instr.opmask`, `instr.zeroing`,
4972/// and `instr.broadcast` fields set by the parser.
4973#[cfg(any(feature = "x86", feature = "x86_64"))]
4974fn evex_aaa(instr: &Instruction) -> u8 {
4975    instr.opmask.map_or(0, |reg| reg.base_code())
4976}
4977
4978#[cfg(any(feature = "x86", feature = "x86_64"))]
4979fn evex_broadcast_bit(instr: &Instruction) -> bool {
4980    instr.broadcast.is_some()
4981}
4982
4983#[cfg(any(feature = "x86", feature = "x86_64"))]
4984pub(crate) fn encode_evex_op(
4985    buf: &mut InstrBytes,
4986    ops: &[Operand],
4987    instr: &Instruction,
4988    pp: u8,
4989    mm: u8,
4990    load_opcode: u8,
4991    store_opcode: Option<u8>,
4992    w: bool,
4993    reloc: &mut Option<Relocation>,
4994) -> Result<(), AsmError> {
4995    use Operand::*;
4996    let mandatory_pp = vex_pp(pp);
4997    let o = (ops.first(), ops.get(1), ops.get(2));
4998    match o {
4999        // 3-operand: reg, reg, reg
5000        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)))
5001            if dst.is_vector() && src1.is_vector() && src2.is_vector() =>
5002        {
5003            let ll = evex_ll(*dst);
5004            let aaa = evex_aaa(instr);
5005            emit_evex_prefix_rrr(
5006                buf,
5007                *dst,
5008                *src1,
5009                *src2,
5010                mm,
5011                w,
5012                mandatory_pp,
5013                ll,
5014                aaa,
5015                instr.zeroing,
5016            );
5017            buf.push(load_opcode);
5018            buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
5019            Ok(())
5020        }
5021        // 3-operand: reg, reg, mem
5022        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)))
5023            if dst.is_vector() && src1.is_vector() =>
5024        {
5025            let ll = evex_ll(*dst);
5026            let aaa = evex_aaa(instr);
5027            let b_bit = evex_broadcast_bit(instr);
5028            emit_evex_prefix_rrm(
5029                buf,
5030                *dst,
5031                *src1,
5032                mem,
5033                mm,
5034                w,
5035                mandatory_pp,
5036                ll,
5037                b_bit,
5038                aaa,
5039                instr.zeroing,
5040            );
5041            buf.push(load_opcode);
5042            emit_mem_modrm(buf, dst.base_code(), mem);
5043            if let Some(ref mut rel) = reloc {
5044                rel.offset = buf.len() - 4;
5045            }
5046            Ok(())
5047        }
5048        // 2-operand reg, reg (move-like: vvvv unused, use K0 as dummy NDS)
5049        (Some(Register(dst)), Some(Register(src)), None) if dst.is_vector() && src.is_vector() => {
5050            let ll = evex_ll(*dst);
5051            let aaa = evex_aaa(instr);
5052            // vvvv = 0 (K0 stand-in for unused NDS)
5053            let (_, src_ext, src_evex) = evex_reg_bits(*src);
5054            let (_, dst_ext, dst_evex) = evex_reg_bits(*dst);
5055            emit_evex(
5056                buf,
5057                dst_ext,
5058                src_evex,
5059                src_ext,
5060                dst_evex,
5061                mm,
5062                w,
5063                0,
5064                false,
5065                mandatory_pp,
5066                instr.zeroing,
5067                ll,
5068                false,
5069                aaa,
5070            );
5071            buf.push(load_opcode);
5072            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
5073            Ok(())
5074        }
5075        // 2-operand reg, mem (load)
5076        (Some(Register(dst)), Some(Memory(mem)), None) if dst.is_vector() => {
5077            let ll = evex_ll(*dst);
5078            let aaa = evex_aaa(instr);
5079            let b_bit = evex_broadcast_bit(instr);
5080            let (_, dst_ext, dst_evex) = evex_reg_bits(*dst);
5081            let x_ext = mem.index.is_some_and(|r| r.is_extended());
5082            let b_ext = mem.base.is_some_and(|r| r.is_extended());
5083            emit_evex(
5084                buf,
5085                dst_ext,
5086                x_ext,
5087                b_ext,
5088                dst_evex,
5089                mm,
5090                w,
5091                0,
5092                false,
5093                mandatory_pp,
5094                instr.zeroing,
5095                ll,
5096                b_bit,
5097                aaa,
5098            );
5099            buf.push(load_opcode);
5100            emit_mem_modrm(buf, dst.base_code(), mem);
5101            if let Some(ref mut rel) = reloc {
5102                rel.offset = buf.len() - 4;
5103            }
5104            Ok(())
5105        }
5106        // 2-operand mem, reg (store)
5107        (Some(Memory(mem)), Some(Register(src)), None)
5108            if src.is_vector() && store_opcode.is_some() =>
5109        {
5110            let ll = evex_ll(*src);
5111            let aaa = evex_aaa(instr);
5112            let (_, src_ext, src_evex) = evex_reg_bits(*src);
5113            let x_ext = mem.index.is_some_and(|r| r.is_extended());
5114            let b_ext = mem.base.is_some_and(|r| r.is_extended());
5115            emit_evex(
5116                buf,
5117                src_ext,
5118                x_ext,
5119                b_ext,
5120                src_evex,
5121                mm,
5122                w,
5123                0,
5124                false,
5125                mandatory_pp,
5126                instr.zeroing,
5127                ll,
5128                false,
5129                aaa,
5130            );
5131            // SAFETY: match guard `store_opcode.is_some()` guarantees Some
5132            buf.push(store_opcode.unwrap_or(0));
5133            emit_mem_modrm(buf, src.base_code(), mem);
5134            if let Some(ref mut rel) = reloc {
5135                rel.offset = buf.len() - 4;
5136            }
5137            Ok(())
5138        }
5139        _ => Err(invalid_operands(
5140            &instr.mnemonic,
5141            "expected EVEX zmm/ymm/xmm operands",
5142            instr.span,
5143        )),
5144    }
5145}
5146
5147/// EVEX instruction with immediate byte (4 operands: dst, src1, src2/mem, imm8)
5148#[cfg(any(feature = "x86", feature = "x86_64"))]
5149pub(crate) fn encode_evex_imm(
5150    buf: &mut InstrBytes,
5151    ops: &[Operand],
5152    instr: &Instruction,
5153    pp: u8,
5154    mm: u8,
5155    opcode: u8,
5156    w: bool,
5157    reloc: &mut Option<Relocation>,
5158) -> Result<(), AsmError> {
5159    use Operand::*;
5160    let mandatory_pp = vex_pp(pp);
5161    match (ops.first(), ops.get(1), ops.get(2), ops.get(3)) {
5162        // reg, reg, reg, imm8
5163        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)), Some(Immediate(imm)))
5164            if dst.is_vector() && src1.is_vector() && src2.is_vector() =>
5165        {
5166            let ll = evex_ll(*dst);
5167            let aaa = evex_aaa(instr);
5168            emit_evex_prefix_rrr(
5169                buf,
5170                *dst,
5171                *src1,
5172                *src2,
5173                mm,
5174                w,
5175                mandatory_pp,
5176                ll,
5177                aaa,
5178                instr.zeroing,
5179            );
5180            buf.push(opcode);
5181            buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
5182            buf.push(*imm as u8);
5183            Ok(())
5184        }
5185        // reg, reg, mem, imm8
5186        (Some(Register(dst)), Some(Register(src1)), Some(Memory(mem)), Some(Immediate(imm)))
5187            if dst.is_vector() && src1.is_vector() =>
5188        {
5189            let ll = evex_ll(*dst);
5190            let aaa = evex_aaa(instr);
5191            let b_bit = evex_broadcast_bit(instr);
5192            emit_evex_prefix_rrm(
5193                buf,
5194                *dst,
5195                *src1,
5196                mem,
5197                mm,
5198                w,
5199                mandatory_pp,
5200                ll,
5201                b_bit,
5202                aaa,
5203                instr.zeroing,
5204            );
5205            buf.push(opcode);
5206            emit_mem_modrm(buf, dst.base_code(), mem);
5207            if let Some(ref mut rel) = reloc {
5208                rel.offset = buf.len() - 4;
5209            }
5210            buf.push(*imm as u8);
5211            Ok(())
5212        }
5213        // 3-operand with imm: reg, reg/mem, imm8 (some instructions are NDS-free)
5214        (Some(Register(dst)), Some(Register(src)), Some(Immediate(imm)), None)
5215            if dst.is_vector() && src.is_vector() =>
5216        {
5217            let ll = evex_ll(*dst);
5218            let aaa = evex_aaa(instr);
5219            let (_, src_ext, src_evex) = evex_reg_bits(*src);
5220            let (_, dst_ext, dst_evex) = evex_reg_bits(*dst);
5221            emit_evex(
5222                buf,
5223                dst_ext,
5224                src_evex,
5225                src_ext,
5226                dst_evex,
5227                mm,
5228                w,
5229                0,
5230                false,
5231                mandatory_pp,
5232                instr.zeroing,
5233                ll,
5234                false,
5235                aaa,
5236            );
5237            buf.push(opcode);
5238            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
5239            buf.push(*imm as u8);
5240            Ok(())
5241        }
5242        _ => Err(invalid_operands(
5243            &instr.mnemonic,
5244            "expected EVEX operands with immediate",
5245            instr.span,
5246        )),
5247    }
5248}
5249
5250/// EVEX opmask instruction: `KADDB k1, k2, k3` etc.
5251/// Opmask-to-opmask operations use EVEX with vvvv=k, reg=k, r/m=k.
5252/// Will be wired up when opmask instruction dispatch is added.
5253#[cfg(any(feature = "x86", feature = "x86_64"))]
5254#[allow(dead_code)]
5255pub(crate) fn encode_evex_opmask(
5256    buf: &mut InstrBytes,
5257    ops: &[Operand],
5258    instr: &Instruction,
5259    pp: u8,
5260    _mm: u8,
5261    opcode: u8,
5262    w: bool,
5263) -> Result<(), AsmError> {
5264    use Operand::*;
5265    let mandatory_pp = vex_pp(pp);
5266    match (ops.first(), ops.get(1), ops.get(2)) {
5267        // 3-operand: k, k, k (e.g., KADDB k1, k2, k3)
5268        (Some(Register(dst)), Some(Register(src1)), Some(Register(src2)))
5269            if dst.is_opmask() && src1.is_opmask() && src2.is_opmask() =>
5270        {
5271            // Opmask instructions use VEX encoding (not EVEX) per Intel SDM
5272            // VEX.L=1 for most opmask ops, vvvv=src1
5273            let src1_vvvv = src1.base_code();
5274            emit_vex_prefix(
5275                buf,
5276                false,
5277                false,
5278                false,
5279                w,
5280                src1_vvvv,
5281                true,
5282                mandatory_pp,
5283                &[0x0F],
5284            );
5285            buf.push(opcode);
5286            buf.push(0xC0 | (dst.base_code() << 3) | src2.base_code());
5287            Ok(())
5288        }
5289        // 2-operand: k, k (e.g., KNOTB k1, k2)
5290        (Some(Register(dst)), Some(Register(src)), None) if dst.is_opmask() && src.is_opmask() => {
5291            emit_vex_prefix(buf, false, false, false, w, 0, true, mandatory_pp, &[0x0F]);
5292            buf.push(opcode);
5293            buf.push(0xC0 | (dst.base_code() << 3) | src.base_code());
5294            Ok(())
5295        }
5296        _ => Err(invalid_operands(
5297            &instr.mnemonic,
5298            "expected opmask register operands (k0-k7)",
5299            instr.span,
5300        )),
5301    }
5302}
5303
5304#[cfg(test)]
5305mod tests {
5306    use super::*;
5307    use crate::error::Span;
5308
5309    fn span() -> Span {
5310        Span::new(1, 1, 0, 0)
5311    }
5312
5313    fn make_instr(mnemonic: &str, operands: Vec<Operand>) -> Instruction {
5314        Instruction {
5315            mnemonic: Mnemonic::from(mnemonic),
5316            operands: OperandList::from(operands),
5317            size_hint: None,
5318            prefixes: PrefixList::new(),
5319            opmask: None,
5320            zeroing: false,
5321            broadcast: None,
5322            span: span(),
5323        }
5324    }
5325
5326    fn make_instr_with_hint(
5327        mnemonic: &str,
5328        operands: Vec<Operand>,
5329        hint: Option<OperandSize>,
5330    ) -> Instruction {
5331        Instruction {
5332            mnemonic: Mnemonic::from(mnemonic),
5333            operands: OperandList::from(operands),
5334            size_hint: hint,
5335            prefixes: PrefixList::new(),
5336            opmask: None,
5337            zeroing: false,
5338            broadcast: None,
5339            span: span(),
5340        }
5341    }
5342
5343    fn encode(mnemonic: &str, operands: Vec<Operand>) -> Vec<u8> {
5344        let instr = make_instr(mnemonic, operands);
5345        encode_instruction(&instr, Arch::X86_64)
5346            .unwrap()
5347            .bytes
5348            .to_vec()
5349    }
5350
5351    fn encode_with_hint(mnemonic: &str, operands: Vec<Operand>, hint: OperandSize) -> Vec<u8> {
5352        let mut instr = make_instr(mnemonic, operands);
5353        instr.size_hint = Some(hint);
5354        encode_instruction(&instr, Arch::X86_64)
5355            .unwrap()
5356            .bytes
5357            .to_vec()
5358    }
5359
5360    fn encode_with_prefix(mnemonic: &str, operands: Vec<Operand>, prefix: Prefix) -> Vec<u8> {
5361        let mut instr = make_instr(mnemonic, operands);
5362        instr.prefixes = PrefixList::from(alloc::vec![prefix]);
5363        encode_instruction(&instr, Arch::X86_64)
5364            .unwrap()
5365            .bytes
5366            .to_vec()
5367    }
5368
5369    use crate::ir::Register::*;
5370    use Operand::*;
5371
5372    // === Zero-operand instructions ===
5373
5374    #[test]
5375    fn test_nop() {
5376        assert_eq!(encode("nop", vec![]), vec![0x90]);
5377    }
5378
5379    #[test]
5380    fn test_ret() {
5381        assert_eq!(encode("ret", vec![]), vec![0xC3]);
5382    }
5383
5384    #[test]
5385    fn test_syscall() {
5386        assert_eq!(encode("syscall", vec![]), vec![0x0F, 0x05]);
5387    }
5388
5389    #[test]
5390    fn test_int3() {
5391        assert_eq!(encode("int3", vec![]), vec![0xCC]);
5392    }
5393
5394    #[test]
5395    fn test_int_0x80() {
5396        assert_eq!(encode("int", vec![Immediate(0x80)]), vec![0xCD, 0x80]);
5397    }
5398
5399    #[test]
5400    fn test_hlt() {
5401        assert_eq!(encode("hlt", vec![]), vec![0xF4]);
5402    }
5403
5404    // === MOV reg, reg ===
5405
5406    #[test]
5407    fn test_mov_rax_rbx() {
5408        // REX.W + 89 /r (mov r/m64, r64)
5409        let bytes = encode("mov", vec![Register(Rax), Register(Rbx)]);
5410        assert_eq!(bytes, vec![0x48, 0x89, 0xD8]);
5411    }
5412
5413    #[test]
5414    fn test_mov_eax_ecx() {
5415        // 89 /r (mov r/m32, r32)
5416        let bytes = encode("mov", vec![Register(Eax), Register(Ecx)]);
5417        assert_eq!(bytes, vec![0x89, 0xC8]);
5418    }
5419
5420    #[test]
5421    fn test_mov_r8_r9() {
5422        // REX.W+REX.R+REX.B + 89 /r
5423        let bytes = encode("mov", vec![Register(R8), Register(R9)]);
5424        assert_eq!(bytes, vec![0x4D, 0x89, 0xC8]);
5425    }
5426
5427    #[test]
5428    fn test_mov_al_bl() {
5429        let bytes = encode("mov", vec![Register(Al), Register(Bl)]);
5430        assert_eq!(bytes, vec![0x88, 0xD8]);
5431    }
5432
5433    // === MOV reg, imm ===
5434
5435    #[test]
5436    fn test_mov_eax_1() {
5437        let bytes = encode("mov", vec![Register(Eax), Immediate(1)]);
5438        assert_eq!(bytes, vec![0xB8, 0x01, 0x00, 0x00, 0x00]);
5439    }
5440
5441    #[test]
5442    fn test_mov_al_0xff() {
5443        let bytes = encode("mov", vec![Register(Al), Immediate(0xFF)]);
5444        assert_eq!(bytes, vec![0xB0, 0xFF]);
5445    }
5446
5447    #[test]
5448    fn test_mov_rax_small_imm() {
5449        // Should use mov eax, imm32 (zero extends to rax)
5450        let bytes = encode("mov", vec![Register(Rax), Immediate(1)]);
5451        assert_eq!(bytes, vec![0xB8, 0x01, 0x00, 0x00, 0x00]);
5452    }
5453
5454    #[test]
5455    fn test_mov_rax_neg1() {
5456        // mov rax, -1 → REX.W C7 /0 imm32 (sign-extended)
5457        let bytes = encode("mov", vec![Register(Rax), Immediate(-1)]);
5458        assert_eq!(bytes, vec![0x48, 0xC7, 0xC0, 0xFF, 0xFF, 0xFF, 0xFF]);
5459    }
5460
5461    #[test]
5462    fn test_mov_rax_large_imm() {
5463        // movabs rax, imm64
5464        let bytes = encode("mov", vec![Register(Rax), Immediate(0x0102030405060708)]);
5465        assert_eq!(
5466            bytes,
5467            vec![0x48, 0xB8, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01]
5468        );
5469    }
5470
5471    #[test]
5472    fn test_mov_r8d_imm() {
5473        let bytes = encode("mov", vec![Register(R8d), Immediate(42)]);
5474        assert_eq!(bytes, vec![0x41, 0xB8, 0x2A, 0x00, 0x00, 0x00]);
5475    }
5476
5477    // === MOV reg, [mem] and [mem], reg ===
5478
5479    #[test]
5480    fn test_mov_rax_mem_rbx() {
5481        // mov rax, [rbx] → REX.W 8B 03
5482        let mem = MemoryOperand {
5483            base: Some(Rbx),
5484            ..Default::default()
5485        };
5486        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
5487        assert_eq!(bytes, vec![0x48, 0x8B, 0x03]);
5488    }
5489
5490    #[test]
5491    fn test_mov_mem_rbx_rax() {
5492        // mov [rbx], rax → REX.W 89 03
5493        let mem = MemoryOperand {
5494            base: Some(Rbx),
5495            ..Default::default()
5496        };
5497        let bytes = encode("mov", vec![Memory(Box::new(mem)), Register(Rax)]);
5498        assert_eq!(bytes, vec![0x48, 0x89, 0x03]);
5499    }
5500
5501    #[test]
5502    fn test_mov_rax_mem_rbp_disp8() {
5503        // mov rax, [rbp + 8] → REX.W 8B 45 08
5504        let mem = MemoryOperand {
5505            base: Some(Rbp),
5506            disp: 8,
5507            ..Default::default()
5508        };
5509        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
5510        assert_eq!(bytes, vec![0x48, 0x8B, 0x45, 0x08]);
5511    }
5512
5513    #[test]
5514    fn test_mov_rax_mem_rbp_disp32() {
5515        // mov rax, [rbp + 0x200] → REX.W 8B 85 00 02 00 00
5516        let mem = MemoryOperand {
5517            base: Some(Rbp),
5518            disp: 0x200,
5519            ..Default::default()
5520        };
5521        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
5522        assert_eq!(bytes, vec![0x48, 0x8B, 0x85, 0x00, 0x02, 0x00, 0x00]);
5523    }
5524
5525    // === PUSH / POP ===
5526
5527    #[test]
5528    fn test_push_rbp() {
5529        assert_eq!(encode("push", vec![Register(Rbp)]), vec![0x55]);
5530    }
5531
5532    #[test]
5533    fn test_push_r12() {
5534        assert_eq!(encode("push", vec![Register(R12)]), vec![0x41, 0x54]);
5535    }
5536
5537    #[test]
5538    fn test_pop_rbp() {
5539        assert_eq!(encode("pop", vec![Register(Rbp)]), vec![0x5D]);
5540    }
5541
5542    #[test]
5543    fn test_push_imm8() {
5544        assert_eq!(encode("push", vec![Immediate(1)]), vec![0x6A, 0x01]);
5545    }
5546
5547    #[test]
5548    fn test_push_imm32() {
5549        assert_eq!(
5550            encode("push", vec![Immediate(0x1000)]),
5551            vec![0x68, 0x00, 0x10, 0x00, 0x00]
5552        );
5553    }
5554
5555    // === ALU ===
5556
5557    #[test]
5558    fn test_add_rax_rbx() {
5559        let bytes = encode("add", vec![Register(Rax), Register(Rbx)]);
5560        assert_eq!(bytes, vec![0x48, 0x01, 0xD8]); // REX.W 01 /r
5561    }
5562
5563    #[test]
5564    fn test_add_eax_1() {
5565        // add eax, 1 → 83 C0 01 (sign-extended imm8)
5566        let bytes = encode("add", vec![Register(Eax), Immediate(1)]);
5567        assert_eq!(bytes, vec![0x83, 0xC0, 0x01]);
5568    }
5569
5570    #[test]
5571    fn test_sub_rsp_8() {
5572        // sub rsp, 8 → REX.W 83 EC 08
5573        let bytes = encode("sub", vec![Register(Rsp), Immediate(8)]);
5574        assert_eq!(bytes, vec![0x48, 0x83, 0xEC, 0x08]);
5575    }
5576
5577    #[test]
5578    fn test_xor_eax_eax() {
5579        let bytes = encode("xor", vec![Register(Eax), Register(Eax)]);
5580        assert_eq!(bytes, vec![0x31, 0xC0]);
5581    }
5582
5583    #[test]
5584    fn test_cmp_rax_0() {
5585        let bytes = encode("cmp", vec![Register(Rax), Immediate(0)]);
5586        assert_eq!(bytes, vec![0x48, 0x83, 0xF8, 0x00]);
5587    }
5588
5589    #[test]
5590    fn test_and_al_imm() {
5591        // and al, 0x0F → 24 0F (short form)
5592        let bytes = encode("and", vec![Register(Al), Immediate(0x0F)]);
5593        assert_eq!(bytes, vec![0x24, 0x0F]);
5594    }
5595
5596    #[test]
5597    fn test_or_eax_large_imm() {
5598        // or eax, 0x1000 → 0D 00 10 00 00 (short form for eax)
5599        let bytes = encode("or", vec![Register(Eax), Immediate(0x1000)]);
5600        assert_eq!(bytes, vec![0x0D, 0x00, 0x10, 0x00, 0x00]);
5601    }
5602
5603    // === TEST ===
5604
5605    #[test]
5606    fn test_test_al_imm() {
5607        let bytes = encode("test", vec![Register(Al), Immediate(1)]);
5608        assert_eq!(bytes, vec![0xA8, 0x01]);
5609    }
5610
5611    #[test]
5612    fn test_test_eax_eax() {
5613        let bytes = encode("test", vec![Register(Eax), Register(Eax)]);
5614        assert_eq!(bytes, vec![0x85, 0xC0]);
5615    }
5616
5617    // === Shifts ===
5618
5619    #[test]
5620    fn test_shl_eax_1() {
5621        let bytes = encode("shl", vec![Register(Eax), Immediate(1)]);
5622        assert_eq!(bytes, vec![0xD1, 0xE0]);
5623    }
5624
5625    #[test]
5626    fn test_shr_rcx_4() {
5627        let bytes = encode("shr", vec![Register(Rcx), Immediate(4)]);
5628        assert_eq!(bytes, vec![0x48, 0xC1, 0xE9, 0x04]);
5629    }
5630
5631    #[test]
5632    fn test_sar_rax_cl() {
5633        let bytes = encode("sar", vec![Register(Rax), Register(Cl)]);
5634        assert_eq!(bytes, vec![0x48, 0xD3, 0xF8]);
5635    }
5636
5637    // === INC / DEC ===
5638
5639    #[test]
5640    fn test_inc_rax() {
5641        let bytes = encode("inc", vec![Register(Rax)]);
5642        assert_eq!(bytes, vec![0x48, 0xFF, 0xC0]);
5643    }
5644
5645    #[test]
5646    fn test_dec_ecx() {
5647        let bytes = encode("dec", vec![Register(Ecx)]);
5648        assert_eq!(bytes, vec![0xFF, 0xC9]);
5649    }
5650
5651    // === NEG / NOT ===
5652
5653    #[test]
5654    fn test_neg_rax() {
5655        let bytes = encode("neg", vec![Register(Rax)]);
5656        assert_eq!(bytes, vec![0x48, 0xF7, 0xD8]);
5657    }
5658
5659    #[test]
5660    fn test_not_eax() {
5661        let bytes = encode("not", vec![Register(Eax)]);
5662        assert_eq!(bytes, vec![0xF7, 0xD0]);
5663    }
5664
5665    // === JMP / CALL ===
5666
5667    #[test]
5668    fn test_jmp_label_relocation() {
5669        let instr = make_instr("jmp", vec![Label(String::from("target"))]);
5670        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
5671        assert_eq!(result.bytes, vec![0xE9, 0x00, 0x00, 0x00, 0x00]);
5672        assert!(result.relocation.is_some());
5673        let r = result.relocation.unwrap();
5674        assert_eq!(&*r.label, "target");
5675        assert_eq!(r.kind, RelocKind::X86Relative);
5676        assert_eq!(r.size, 4);
5677    }
5678
5679    #[test]
5680    fn test_jmp_reg() {
5681        let bytes = encode("jmp", vec![Register(Rax)]);
5682        assert_eq!(bytes, vec![0xFF, 0xE0]);
5683    }
5684
5685    #[test]
5686    fn test_call_reg() {
5687        let bytes = encode("call", vec![Register(Rax)]);
5688        assert_eq!(bytes, vec![0xFF, 0xD0]);
5689    }
5690
5691    #[test]
5692    fn test_call_r12() {
5693        let bytes = encode("call", vec![Register(R12)]);
5694        assert_eq!(bytes, vec![0x41, 0xFF, 0xD4]);
5695    }
5696
5697    // === Jcc ===
5698
5699    #[test]
5700    fn test_je_label() {
5701        let instr = make_instr("je", vec![Label(String::from("target"))]);
5702        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
5703        assert_eq!(result.bytes[0..2], [0x0F, 0x84]);
5704        assert!(result.relocation.is_some());
5705    }
5706
5707    #[test]
5708    fn test_jne_label() {
5709        let instr = make_instr("jne", vec![Label(String::from("target"))]);
5710        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
5711        assert_eq!(result.bytes[0..2], [0x0F, 0x85]);
5712    }
5713
5714    // === SETcc ===
5715
5716    #[test]
5717    fn test_sete_al() {
5718        let bytes = encode("sete", vec![Register(Al)]);
5719        assert_eq!(bytes, vec![0x0F, 0x94, 0xC0]);
5720    }
5721
5722    // === CMOVcc ===
5723
5724    #[test]
5725    fn test_cmove_rax_rbx() {
5726        let bytes = encode("cmove", vec![Register(Rax), Register(Rbx)]);
5727        assert_eq!(bytes, vec![0x48, 0x0F, 0x44, 0xC3]);
5728    }
5729
5730    // === MOVZX / MOVSX ===
5731
5732    #[test]
5733    fn test_movzx_eax_al() {
5734        let bytes = encode("movzx", vec![Register(Eax), Register(Al)]);
5735        assert_eq!(bytes, vec![0x0F, 0xB6, 0xC0]);
5736    }
5737
5738    #[test]
5739    fn test_movsx_rax_eax() {
5740        let bytes = encode("movsx", vec![Register(Rax), Register(Eax)]);
5741        assert_eq!(bytes, vec![0x48, 0x63, 0xC0]);
5742    }
5743
5744    // === LEA ===
5745
5746    #[test]
5747    fn test_lea_rax_rbx_rcx_8() {
5748        let mem = MemoryOperand {
5749            base: Some(Rbx),
5750            index: Some(Rcx),
5751            scale: 8,
5752            disp: 0,
5753            ..Default::default()
5754        };
5755        let bytes = encode("lea", vec![Register(Rax), Memory(Box::new(mem))]);
5756        assert_eq!(bytes, vec![0x48, 0x8D, 0x04, 0xCB]);
5757    }
5758
5759    // === Prefix ===
5760
5761    #[test]
5762    fn test_lock_prefix() {
5763        let mem = MemoryOperand {
5764            base: Some(Rax),
5765            ..Default::default()
5766        };
5767        let bytes = encode_with_prefix(
5768            "add",
5769            vec![Memory(Box::new(mem)), Immediate(1)],
5770            Prefix::Lock,
5771        );
5772        assert_eq!(bytes[0], 0xF0); // LOCK prefix
5773    }
5774
5775    // === IMUL ===
5776
5777    #[test]
5778    fn test_imul_r_r() {
5779        let bytes = encode("imul", vec![Register(Rax), Register(Rbx)]);
5780        assert_eq!(bytes, vec![0x48, 0x0F, 0xAF, 0xC3]);
5781    }
5782
5783    #[test]
5784    fn test_imul_r_r_imm8() {
5785        let bytes = encode("imul", vec![Register(Rax), Register(Rbx), Immediate(10)]);
5786        assert_eq!(bytes, vec![0x48, 0x6B, 0xC3, 0x0A]);
5787    }
5788
5789    // === BSWAP ===
5790
5791    #[test]
5792    fn test_bswap_eax() {
5793        let bytes = encode("bswap", vec![Register(Eax)]);
5794        assert_eq!(bytes, vec![0x0F, 0xC8]);
5795    }
5796
5797    #[test]
5798    fn test_bswap_rax() {
5799        let bytes = encode("bswap", vec![Register(Rax)]);
5800        assert_eq!(bytes, vec![0x48, 0x0F, 0xC8]);
5801    }
5802
5803    // === String Instructions ===
5804
5805    #[test]
5806    fn test_rep_movsb() {
5807        let mut instr = make_instr("movsb", vec![]);
5808        instr.prefixes = PrefixList::from(alloc::vec![Prefix::Rep]);
5809        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
5810        assert_eq!(result.bytes, vec![0xF3, 0xA4]);
5811    }
5812
5813    // === Multi-byte NOP ===
5814
5815    #[test]
5816    fn test_nop3_encoding() {
5817        let instr = make_instr("nop3", vec![]);
5818        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
5819        assert_eq!(result.bytes, vec![0x0F, 0x1F, 0x00]);
5820    }
5821
5822    // === RSP/RBP edge cases (SIB byte required) ===
5823
5824    #[test]
5825    fn test_mov_rax_mem_rsp() {
5826        // [rsp] requires SIB byte
5827        let mem = MemoryOperand {
5828            base: Some(Rsp),
5829            ..Default::default()
5830        };
5831        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
5832        // REX.W 8B 04 24 (ModRM=04h → SIB follows, SIB=24h → base=RSP, no index)
5833        assert_eq!(bytes, vec![0x48, 0x8B, 0x04, 0x24]);
5834    }
5835
5836    // === CDQ / CQO ===
5837
5838    #[test]
5839    fn test_cdq() {
5840        assert_eq!(encode("cdq", vec![]), vec![0x99]);
5841    }
5842
5843    #[test]
5844    fn test_cqo() {
5845        assert_eq!(encode("cqo", vec![]), vec![0x48, 0x99]);
5846    }
5847
5848    // === 16-bit operations ===
5849
5850    #[test]
5851    fn test_mov_ax_bx() {
5852        let bytes = encode("mov", vec![Register(Ax), Register(Bx)]);
5853        assert_eq!(bytes, vec![0x66, 0x89, 0xD8]);
5854    }
5855
5856    #[test]
5857    fn test_add_ax_imm() {
5858        let bytes = encode("add", vec![Register(Ax), Immediate(1)]);
5859        assert_eq!(bytes, vec![0x66, 0x83, 0xC0, 0x01]);
5860    }
5861
5862    // === Arch::X86 basic smoke test ===
5863
5864    #[test]
5865    fn test_arch_x86_nop() {
5866        let instr = make_instr("nop", vec![]);
5867        let result = encode_instruction(&instr, Arch::X86);
5868        assert!(result.is_ok());
5869        assert_eq!(result.unwrap().bytes, vec![0x90]);
5870    }
5871
5872    // === x86-32 INC/DEC short forms (0x40+rd / 0x48+rd) ===
5873
5874    #[test]
5875    fn test_x86_32_inc_eax() {
5876        // inc eax → 0x40 (short form)
5877        let instr = make_instr("inc", vec![Register(Eax)]);
5878        let result = encode_instruction(&instr, Arch::X86).unwrap();
5879        assert_eq!(result.bytes, vec![0x40]);
5880    }
5881
5882    #[test]
5883    fn test_x86_32_inc_ebx() {
5884        // inc ebx → 0x43 (0x40 + 3)
5885        let instr = make_instr("inc", vec![Register(Ebx)]);
5886        let result = encode_instruction(&instr, Arch::X86).unwrap();
5887        assert_eq!(result.bytes, vec![0x43]);
5888    }
5889
5890    #[test]
5891    fn test_x86_32_inc_edi() {
5892        // inc edi → 0x47 (0x40 + 7)
5893        let instr = make_instr("inc", vec![Register(Edi)]);
5894        let result = encode_instruction(&instr, Arch::X86).unwrap();
5895        assert_eq!(result.bytes, vec![0x47]);
5896    }
5897
5898    #[test]
5899    fn test_x86_32_dec_eax() {
5900        // dec eax → 0x48 (short form)
5901        let instr = make_instr("dec", vec![Register(Eax)]);
5902        let result = encode_instruction(&instr, Arch::X86).unwrap();
5903        assert_eq!(result.bytes, vec![0x48]);
5904    }
5905
5906    #[test]
5907    fn test_x86_32_dec_esp() {
5908        // dec esp → 0x4C (0x48 + 4)
5909        let instr = make_instr("dec", vec![Register(Esp)]);
5910        let result = encode_instruction(&instr, Arch::X86).unwrap();
5911        assert_eq!(result.bytes, vec![0x4C]);
5912    }
5913
5914    #[test]
5915    fn test_x86_32_inc_ax() {
5916        // inc ax → 66 40 (16-bit override + short form)
5917        let instr = make_instr("inc", vec![Register(Ax)]);
5918        let result = encode_instruction(&instr, Arch::X86).unwrap();
5919        assert_eq!(result.bytes, vec![0x66, 0x40]);
5920    }
5921
5922    #[test]
5923    fn test_x86_32_dec_cx() {
5924        // dec cx → 66 49 (16-bit override + short form)
5925        let instr = make_instr("dec", vec![Register(Cx)]);
5926        let result = encode_instruction(&instr, Arch::X86).unwrap();
5927        assert_eq!(result.bytes, vec![0x66, 0x49]);
5928    }
5929
5930    #[test]
5931    fn test_x86_32_inc_al_uses_modrm() {
5932        // inc al → FE C0 (8-bit uses ModR/M form, not short form)
5933        let instr = make_instr("inc", vec![Register(Al)]);
5934        let result = encode_instruction(&instr, Arch::X86).unwrap();
5935        assert_eq!(result.bytes, vec![0xFE, 0xC0]);
5936    }
5937
5938    // === push/pop 16-bit registers ===
5939
5940    #[test]
5941    fn test_push_ax() {
5942        // push ax → 66 50
5943        let bytes = encode("push", vec![Register(Ax)]);
5944        assert_eq!(bytes, vec![0x66, 0x50]);
5945    }
5946
5947    #[test]
5948    fn test_pop_ax() {
5949        // pop ax → 66 58
5950        let bytes = encode("pop", vec![Register(Ax)]);
5951        assert_eq!(bytes, vec![0x66, 0x58]);
5952    }
5953
5954    #[test]
5955    fn test_push_bx() {
5956        let bytes = encode("push", vec![Register(Bx)]);
5957        assert_eq!(bytes, vec![0x66, 0x53]);
5958    }
5959
5960    #[test]
5961    fn test_pop_bx() {
5962        let bytes = encode("pop", vec![Register(Bx)]);
5963        assert_eq!(bytes, vec![0x66, 0x5B]);
5964    }
5965
5966    // === xchg 16-bit ===
5967
5968    #[test]
5969    fn test_xchg_ax_bx_shortcut() {
5970        // xchg ax, bx → 66 93
5971        let bytes = encode("xchg", vec![Register(Ax), Register(Bx)]);
5972        assert_eq!(bytes, vec![0x66, 0x93]);
5973    }
5974
5975    // === movsx with memory operand ===
5976
5977    #[test]
5978    fn test_movsx_eax_byte_mem() {
5979        // movsx eax, byte [rbx] → 0F BE 03
5980        let mem = MemoryOperand {
5981            base: Some(Rbx),
5982            ..Default::default()
5983        };
5984        let bytes = encode_with_hint(
5985            "movsx",
5986            vec![Register(Eax), Memory(Box::new(mem))],
5987            OperandSize::Byte,
5988        );
5989        assert_eq!(bytes, vec![0x0F, 0xBE, 0x03]);
5990    }
5991
5992    #[test]
5993    fn test_movsx_rax_word_mem() {
5994        // movsx rax, word [rbx] → 48 0F BF 03
5995        let mem = MemoryOperand {
5996            base: Some(Rbx),
5997            ..Default::default()
5998        };
5999        let bytes = encode_with_hint(
6000            "movsx",
6001            vec![Register(Rax), Memory(Box::new(mem))],
6002            OperandSize::Word,
6003        );
6004        assert_eq!(bytes, vec![0x48, 0x0F, 0xBF, 0x03]);
6005    }
6006
6007    #[test]
6008    fn test_movsxd_rax_dword_mem() {
6009        // movsxd rax, dword [rbx] → 48 63 03
6010        let mem = MemoryOperand {
6011            base: Some(Rbx),
6012            ..Default::default()
6013        };
6014        let bytes = encode_with_hint(
6015            "movsx",
6016            vec![Register(Rax), Memory(Box::new(mem))],
6017            OperandSize::Dword,
6018        );
6019        assert_eq!(bytes, vec![0x48, 0x63, 0x03]);
6020    }
6021
6022    // === bsf/bsr with memory operand ===
6023
6024    #[test]
6025    fn test_bsf_eax_mem() {
6026        // bsf eax, [rbx] → 0F BC 03
6027        let mem = MemoryOperand {
6028            base: Some(Rbx),
6029            ..Default::default()
6030        };
6031        let bytes = encode("bsf", vec![Register(Eax), Memory(Box::new(mem))]);
6032        assert_eq!(bytes, vec![0x0F, 0xBC, 0x03]);
6033    }
6034
6035    #[test]
6036    fn test_bsr_rax_mem() {
6037        // bsr rax, [rbx] → 48 0F BD 03
6038        let mem = MemoryOperand {
6039            base: Some(Rbx),
6040            ..Default::default()
6041        };
6042        let bytes = encode("bsr", vec![Register(Rax), Memory(Box::new(mem))]);
6043        assert_eq!(bytes, vec![0x48, 0x0F, 0xBD, 0x03]);
6044    }
6045
6046    // === popcnt/lzcnt/tzcnt with memory operand ===
6047
6048    #[test]
6049    fn test_popcnt_eax_mem() {
6050        // popcnt eax, [rbx] → F3 0F B8 03
6051        let mem = MemoryOperand {
6052            base: Some(Rbx),
6053            ..Default::default()
6054        };
6055        let bytes = encode("popcnt", vec![Register(Eax), Memory(Box::new(mem))]);
6056        assert_eq!(bytes, vec![0xF3, 0x0F, 0xB8, 0x03]);
6057    }
6058
6059    #[test]
6060    fn test_lzcnt_rax_mem() {
6061        // lzcnt rax, [rbx] → F3 48 0F BD 03
6062        let mem = MemoryOperand {
6063            base: Some(Rbx),
6064            ..Default::default()
6065        };
6066        let bytes = encode("lzcnt", vec![Register(Rax), Memory(Box::new(mem))]);
6067        assert_eq!(bytes, vec![0xF3, 0x48, 0x0F, 0xBD, 0x03]);
6068    }
6069
6070    #[test]
6071    fn test_tzcnt_eax_mem() {
6072        // tzcnt eax, [rbx] → F3 0F BC 03
6073        let mem = MemoryOperand {
6074            base: Some(Rbx),
6075            ..Default::default()
6076        };
6077        let bytes = encode("tzcnt", vec![Register(Eax), Memory(Box::new(mem))]);
6078        assert_eq!(bytes, vec![0xF3, 0x0F, 0xBC, 0x03]);
6079    }
6080
6081    // === bt/bts/btr/btc with memory operands ===
6082
6083    #[test]
6084    fn test_bt_mem_reg() {
6085        // bt [rbx], eax → 0F A3 03
6086        let mem = MemoryOperand {
6087            base: Some(Rbx),
6088            ..Default::default()
6089        };
6090        let bytes = encode("bt", vec![Memory(Box::new(mem)), Register(Eax)]);
6091        assert_eq!(bytes, vec![0x0F, 0xA3, 0x03]);
6092    }
6093
6094    #[test]
6095    fn test_bts_mem_imm() {
6096        // bts dword [rbx], 5 → 0F BA 2B 05
6097        let mem = MemoryOperand {
6098            base: Some(Rbx),
6099            size: Some(OperandSize::Dword),
6100            ..Default::default()
6101        };
6102        let bytes = encode("bts", vec![Memory(Box::new(mem)), Immediate(5)]);
6103        assert_eq!(bytes, vec![0x0F, 0xBA, 0x2B, 0x05]);
6104    }
6105
6106    // === shift instructions with memory operands ===
6107
6108    #[test]
6109    fn test_shl_mem_1() {
6110        // shl dword [rbx], 1 → D1 23
6111        let mem = MemoryOperand {
6112            base: Some(Rbx),
6113            size: Some(OperandSize::Dword),
6114            ..Default::default()
6115        };
6116        let bytes = encode("shl", vec![Memory(Box::new(mem)), Immediate(1)]);
6117        assert_eq!(bytes, vec![0xD1, 0x23]);
6118    }
6119
6120    #[test]
6121    fn test_shr_mem_imm() {
6122        // shr dword [rbx], 4 → C1 2B 04
6123        let mem = MemoryOperand {
6124            base: Some(Rbx),
6125            size: Some(OperandSize::Dword),
6126            ..Default::default()
6127        };
6128        let bytes = encode("shr", vec![Memory(Box::new(mem)), Immediate(4)]);
6129        assert_eq!(bytes, vec![0xC1, 0x2B, 0x04]);
6130    }
6131
6132    #[test]
6133    fn test_sar_mem_cl() {
6134        // sar dword [rbx], cl → D3 3B
6135        let mem = MemoryOperand {
6136            base: Some(Rbx),
6137            size: Some(OperandSize::Dword),
6138            ..Default::default()
6139        };
6140        let bytes = encode("sar", vec![Memory(Box::new(mem)), Register(Cl)]);
6141        assert_eq!(bytes, vec![0xD3, 0x3B]);
6142    }
6143
6144    // === mov_reg_imm error on unsupported size ===
6145
6146    #[test]
6147    fn test_mov_xmm_imm_error() {
6148        let instr = make_instr("mov", vec![Register(Xmm0), Immediate(1)]);
6149        let result = encode_instruction(&instr, Arch::X86_64);
6150        assert!(result.is_err());
6151    }
6152
6153    // === Expression operands ===
6154
6155    #[test]
6156    fn test_mov_rax_label_expression() {
6157        // mov rax, label+8 → REX.W B8 <imm64> with relocation addend=8
6158        let expr = Expr::Add(
6159            Box::new(Expr::Label(String::from("data"))),
6160            Box::new(Expr::Num(8)),
6161        );
6162        let instr = make_instr("mov", vec![Register(Rax), Expression(expr)]);
6163        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6164        assert_eq!(result.bytes[0], 0x48); // REX.W
6165        assert_eq!(result.bytes[1], 0xB8); // mov rax, imm64
6166        let reloc = result.relocation.unwrap();
6167        assert_eq!(&*reloc.label, "data");
6168        assert_eq!(reloc.addend, 8);
6169        assert_eq!(reloc.size, 8);
6170        assert_eq!(reloc.kind, RelocKind::Absolute);
6171    }
6172
6173    #[test]
6174    fn test_jmp_label_expression() {
6175        let expr = Expr::Sub(
6176            Box::new(Expr::Label(String::from("target"))),
6177            Box::new(Expr::Num(2)),
6178        );
6179        let instr = make_instr("jmp", vec![Expression(expr)]);
6180        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6181        assert_eq!(result.bytes[0], 0xE9); // jmp rel32
6182        let reloc = result.relocation.unwrap();
6183        assert_eq!(&*reloc.label, "target");
6184        assert_eq!(reloc.addend, -2);
6185        assert_eq!(reloc.kind, RelocKind::X86Relative);
6186        assert!(result.relax.is_some()); // Should have relaxation info
6187    }
6188
6189    #[test]
6190    fn test_call_label_expression() {
6191        let expr = Expr::Add(
6192            Box::new(Expr::Label(String::from("func"))),
6193            Box::new(Expr::Num(4)),
6194        );
6195        let instr = make_instr("call", vec![Expression(expr)]);
6196        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6197        assert_eq!(result.bytes[0], 0xE8); // call rel32
6198        let reloc = result.relocation.unwrap();
6199        assert_eq!(&*reloc.label, "func");
6200        assert_eq!(reloc.addend, 4);
6201    }
6202
6203    #[test]
6204    fn test_jcc_label_expression() {
6205        let expr = Expr::Add(
6206            Box::new(Expr::Label(String::from("dest"))),
6207            Box::new(Expr::Num(0)),
6208        );
6209        let instr = make_instr("je", vec![Expression(expr)]);
6210        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6211        assert_eq!(result.bytes[0], 0x0F); // je near
6212        let reloc = result.relocation.unwrap();
6213        assert_eq!(&*reloc.label, "dest");
6214        assert_eq!(reloc.addend, 0);
6215    }
6216
6217    #[test]
6218    fn test_push_label_expression() {
6219        let expr = Expr::Add(
6220            Box::new(Expr::Label(String::from("data"))),
6221            Box::new(Expr::Num(16)),
6222        );
6223        let instr = make_instr("push", vec![Expression(expr)]);
6224        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6225        assert_eq!(result.bytes[0], 0x68); // push imm32
6226        let reloc = result.relocation.unwrap();
6227        assert_eq!(&*reloc.label, "data");
6228        assert_eq!(reloc.addend, 16);
6229    }
6230
6231    #[test]
6232    fn test_loop_label_expression() {
6233        let expr = Expr::Sub(
6234            Box::new(Expr::Label(String::from("top"))),
6235            Box::new(Expr::Num(1)),
6236        );
6237        let instr = make_instr("loop", vec![Expression(expr)]);
6238        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6239        // Long form: E2 02 EB 05 E9 [rel32]
6240        assert_eq!(result.bytes[0], 0xE2); // loop
6241        let reloc = result.relocation.unwrap();
6242        assert_eq!(&*reloc.label, "top");
6243        assert_eq!(reloc.addend, -1);
6244        assert_eq!(reloc.size, 4); // rel32 in long form
6245                                   // Relaxation short form provided
6246        assert!(result.relax.is_some());
6247        let ri = result.relax.unwrap();
6248        assert_eq!(ri.short_bytes[0], 0xE2);
6249        assert_eq!(ri.short_bytes.len(), 2);
6250    }
6251
6252    // === extract_label helper ===
6253
6254    #[test]
6255    fn test_extract_label_plain() {
6256        let op = Label(String::from("foo"));
6257        assert_eq!(extract_label(&op), Some(("foo", 0)));
6258    }
6259
6260    #[test]
6261    fn test_extract_label_expression() {
6262        let expr = Expr::Add(
6263            Box::new(Expr::Label(String::from("bar"))),
6264            Box::new(Expr::Num(10)),
6265        );
6266        assert_eq!(extract_label(&Expression(expr)), Some(("bar", 10)));
6267    }
6268
6269    #[test]
6270    fn test_extract_label_non_label() {
6271        assert_eq!(extract_label(&Immediate(42)), None);
6272        assert_eq!(extract_label(&Register(Rax)), None);
6273    }
6274
6275    // === IMUL reg, mem, imm ===
6276
6277    #[test]
6278    fn test_imul_reg_mem_imm8() {
6279        // imul eax, [rcx], 5 → 6B 01 05
6280        let mem = MemoryOperand {
6281            base: Some(Rcx),
6282            ..Default::default()
6283        };
6284        let bytes = encode(
6285            "imul",
6286            vec![Register(Eax), Memory(Box::new(mem)), Immediate(5)],
6287        );
6288        assert_eq!(bytes, vec![0x6B, 0x01, 0x05]);
6289    }
6290
6291    #[test]
6292    fn test_imul_reg_mem_imm32() {
6293        // imul rax, [rdx], 1000 → 48 69 02 E8 03 00 00
6294        let mem = MemoryOperand {
6295            base: Some(Rdx),
6296            ..Default::default()
6297        };
6298        let bytes = encode(
6299            "imul",
6300            vec![Register(Rax), Memory(Box::new(mem)), Immediate(1000)],
6301        );
6302        assert_eq!(bytes, vec![0x48, 0x69, 0x02, 0xE8, 0x03, 0x00, 0x00]);
6303    }
6304
6305    // === ret imm16 ===
6306
6307    #[test]
6308    fn test_ret_imm16() {
6309        // ret 8 → C2 08 00
6310        let bytes = encode("ret", vec![Immediate(8)]);
6311        assert_eq!(bytes, vec![0xC2, 0x08, 0x00]);
6312    }
6313
6314    #[test]
6315    fn test_ret_imm16_large() {
6316        // ret 0x1234 → C2 34 12
6317        let bytes = encode("ret", vec![Immediate(0x1234)]);
6318        assert_eq!(bytes, vec![0xC2, 0x34, 0x12]);
6319    }
6320
6321    #[test]
6322    fn test_retn_alias() {
6323        // retn = ret (near return)
6324        assert_eq!(encode("retn", vec![]), vec![0xC3]);
6325        assert_eq!(encode("retn", vec![Immediate(4)]), vec![0xC2, 0x04, 0x00]);
6326    }
6327
6328    // === retf / lret (far return) ===
6329
6330    #[test]
6331    fn test_retf() {
6332        assert_eq!(encode("retf", vec![]), vec![0xCB]);
6333    }
6334
6335    #[test]
6336    fn test_retf_imm16() {
6337        // retf 4 → CA 04 00
6338        let bytes = encode("retf", vec![Immediate(4)]);
6339        assert_eq!(bytes, vec![0xCA, 0x04, 0x00]);
6340    }
6341
6342    #[test]
6343    fn test_lret_alias() {
6344        assert_eq!(encode("lret", vec![]), vec![0xCB]);
6345        assert_eq!(encode("lret", vec![Immediate(8)]), vec![0xCA, 0x08, 0x00]);
6346    }
6347
6348    // === movabs alias ===
6349
6350    #[test]
6351    fn test_movabs_alias() {
6352        // movabs rax, 0x12345678 should work like mov rax, 0x12345678
6353        let bytes_mov = encode("mov", vec![Register(Rax), Immediate(0x12345678)]);
6354        let bytes_movabs = encode("movabs", vec![Register(Rax), Immediate(0x12345678)]);
6355        assert_eq!(bytes_mov, bytes_movabs);
6356    }
6357
6358    #[test]
6359    fn test_movabs_imm64() {
6360        // movabs rax, 0x0102030405060708
6361        let bytes = encode("movabs", vec![Register(Rax), Immediate(0x0102030405060708)]);
6362        assert_eq!(
6363            bytes,
6364            vec![0x48, 0xB8, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01]
6365        );
6366    }
6367
6368    // === REX + high-byte conflict detection ===
6369
6370    #[test]
6371    fn test_high_byte_rex_conflict_rejected() {
6372        // mov ah, sil should fail: AH is incompatible with REX (needed for SIL)
6373        let instr = make_instr("mov", vec![Register(Ah), Register(Sil)]);
6374        let result = encode_instruction(&instr, Arch::X86_64);
6375        assert!(result.is_err());
6376        let err = result.unwrap_err();
6377        match err {
6378            AsmError::InvalidOperands { detail, .. } => {
6379                assert!(detail.contains("high-byte"));
6380            }
6381            other => panic!("expected InvalidOperands, got {:?}", other),
6382        }
6383    }
6384
6385    #[test]
6386    fn test_high_byte_extended_reg_conflict_rejected() {
6387        // add ah, r8b should fail: AH + extended register
6388        let instr = make_instr("add", vec![Register(Ah), Register(R8b)]);
6389        let result = encode_instruction(&instr, Arch::X86_64);
6390        assert!(result.is_err());
6391    }
6392
6393    #[test]
6394    fn test_high_byte_without_rex_ok() {
6395        // mov ah, al — both legacy 8-bit, no REX needed → should work
6396        // AH=code 4, AL=code 0, opcode 0x88 r/m,r: modrm(11, 0, 4) = 0xC4
6397        let bytes = encode("mov", vec![Register(Ah), Register(Al)]);
6398        assert_eq!(bytes, vec![0x88, 0xC4]);
6399    }
6400
6401    #[test]
6402    fn test_high_byte_pair_ok() {
6403        // xor ah, ch — two high-byte regs, no REX needed → should work
6404        // AH=code 4, CH=code 5, opcode 0x30 r/m,r: modrm(11, 5, 4) = 0xEC
6405        let bytes = encode("xor", vec![Register(Ah), Register(Ch)]);
6406        assert_eq!(bytes, vec![0x30, 0xEC]);
6407    }
6408
6409    // === LOCK prefix validation ===
6410
6411    #[test]
6412    fn test_lock_valid_memory_dest() {
6413        // lock add dword ptr [rax], 1 — valid (memory destination)
6414        let mem = MemoryOperand {
6415            base: Some(Rax),
6416            ..Default::default()
6417        };
6418        let bytes = encode_with_prefix(
6419            "add",
6420            vec![Memory(Box::new(mem)), Immediate(1)],
6421            Prefix::Lock,
6422        );
6423        assert_eq!(bytes[0], 0xF0);
6424    }
6425
6426    #[test]
6427    fn test_lock_invalid_reg_dest() {
6428        // lock add eax, ebx — invalid (register destination)
6429        let mut instr = make_instr("add", vec![Register(Eax), Register(Ebx)]);
6430        instr.prefixes = PrefixList::from(alloc::vec![Prefix::Lock]);
6431        let result = encode_instruction(&instr, Arch::X86_64);
6432        assert!(result.is_err());
6433        match result.unwrap_err() {
6434            AsmError::InvalidOperands { detail, .. } => {
6435                assert!(detail.contains("LOCK"));
6436            }
6437            other => panic!("expected InvalidOperands for LOCK, got {:?}", other),
6438        }
6439    }
6440
6441    #[test]
6442    fn test_lock_invalid_imm_dest() {
6443        // lock xchg eax, ecx — invalid (register destination, even though xchg is lockable)
6444        let mut instr = make_instr("xchg", vec![Register(Eax), Register(Ecx)]);
6445        instr.prefixes = PrefixList::from(alloc::vec![Prefix::Lock]);
6446        let result = encode_instruction(&instr, Arch::X86_64);
6447        assert!(result.is_err());
6448    }
6449
6450    // === Push/Pop segment registers ===
6451
6452    #[test]
6453    fn test_push_fs() {
6454        // push fs → 0F A0
6455        assert_eq!(encode("push", vec![Register(Fs)]), vec![0x0F, 0xA0]);
6456    }
6457
6458    #[test]
6459    fn test_push_gs() {
6460        // push gs → 0F A8
6461        assert_eq!(encode("push", vec![Register(Gs)]), vec![0x0F, 0xA8]);
6462    }
6463
6464    #[test]
6465    fn test_pop_fs() {
6466        // pop fs → 0F A1
6467        assert_eq!(encode("pop", vec![Register(Fs)]), vec![0x0F, 0xA1]);
6468    }
6469
6470    #[test]
6471    fn test_pop_gs() {
6472        // pop gs → 0F A9
6473        assert_eq!(encode("pop", vec![Register(Gs)]), vec![0x0F, 0xA9]);
6474    }
6475
6476    // === xchg eax,eax → NOP (0x90 single-byte) ===
6477
6478    #[test]
6479    fn test_xchg_eax_eax_is_nop() {
6480        // xchg eax, eax → 90 (the canonical NOP encoding)
6481        let bytes = encode("xchg", vec![Register(Eax), Register(Eax)]);
6482        assert_eq!(bytes, vec![0x90]);
6483    }
6484
6485    #[test]
6486    fn test_xchg_rax_rax() {
6487        // xchg rax, rax → 48 90 (REX.W + NOP form)
6488        let bytes = encode("xchg", vec![Register(Rax), Register(Rax)]);
6489        assert_eq!(bytes, vec![0x48, 0x90]);
6490    }
6491
6492    #[test]
6493    fn test_xchg_ax_ax() {
6494        // xchg ax, ax → 66 90 (16-bit xchg, operand-size prefix + NOP form)
6495        let bytes = encode("xchg", vec![Register(Ax), Register(Ax)]);
6496        assert_eq!(bytes, vec![0x66, 0x90]);
6497    }
6498
6499    // === High-byte register encoding correctness ===
6500
6501    #[test]
6502    fn test_mov_ah_imm8() {
6503        // mov ah, 0x42 → B4 42 (B0+4=B4 for AH)
6504        let bytes = encode("mov", vec![Register(Ah), Immediate(0x42)]);
6505        assert_eq!(bytes, vec![0xB4, 0x42]);
6506    }
6507
6508    #[test]
6509    fn test_mov_ch_imm8() {
6510        // mov ch, 0x11 → B5 11 (B0+5=B5 for CH)
6511        let bytes = encode("mov", vec![Register(Ch), Immediate(0x11)]);
6512        assert_eq!(bytes, vec![0xB5, 0x11]);
6513    }
6514
6515    #[test]
6516    fn test_mov_dh_imm8() {
6517        // mov dh, 0x22 → B6 22 (B0+6=B6 for DH)
6518        let bytes = encode("mov", vec![Register(Dh), Immediate(0x22)]);
6519        assert_eq!(bytes, vec![0xB6, 0x22]);
6520    }
6521
6522    #[test]
6523    fn test_mov_bh_imm8() {
6524        // mov bh, 0x33 → B7 33 (B0+7=B7 for BH)
6525        let bytes = encode("mov", vec![Register(Bh), Immediate(0x33)]);
6526        assert_eq!(bytes, vec![0xB7, 0x33]);
6527    }
6528
6529    // === Shift memory operand with size_hint ===
6530
6531    #[test]
6532    fn test_shl_byte_ptr_mem_1() {
6533        // shl byte ptr [rbx], 1 → D0 /4 with byte operand
6534        let mem = MemoryOperand {
6535            base: Some(Rbx),
6536            ..Default::default()
6537        };
6538        let bytes = encode_with_hint(
6539            "shl",
6540            vec![Memory(Box::new(mem)), Immediate(1)],
6541            OperandSize::Byte,
6542        );
6543        // Should encode as D0 23 (D0=shift byte by 1, ModRM /4 with [rbx])
6544        assert_eq!(bytes[0], 0xD0); // byte opcode, not D1 (dword)
6545    }
6546
6547    #[test]
6548    fn test_shr_qword_ptr_mem_cl() {
6549        // shr qword ptr [rax], cl → REX.W D3 /5
6550        let mem = MemoryOperand {
6551            base: Some(Rax),
6552            ..Default::default()
6553        };
6554        let bytes = encode_with_hint(
6555            "shr",
6556            vec![Memory(Box::new(mem)), Register(Cl)],
6557            OperandSize::Qword,
6558        );
6559        assert_eq!(bytes[0], 0x48); // REX.W
6560        assert_eq!(bytes[1], 0xD3); // qword shift by cl
6561    }
6562
6563    #[test]
6564    fn test_shl_word_ptr_mem_imm() {
6565        // shl word ptr [rcx], 4 → 66 C1 /4 imm8
6566        let mem = MemoryOperand {
6567            base: Some(Rcx),
6568            ..Default::default()
6569        };
6570        let bytes = encode_with_hint(
6571            "shl",
6572            vec![Memory(Box::new(mem)), Immediate(4)],
6573            OperandSize::Word,
6574        );
6575        assert_eq!(bytes[0], 0x66); // 16-bit prefix
6576        assert_eq!(bytes[1], 0xC1); // word shift by imm8
6577    }
6578
6579    // === Push/Pop validation ===
6580
6581    #[test]
6582    fn test_push_eax_rejected() {
6583        // push eax is invalid in 64-bit mode
6584        let instr = make_instr("push", vec![Register(Eax)]);
6585        let result = encode_instruction(&instr, Arch::X86_64);
6586        assert!(result.is_err());
6587    }
6588
6589    #[test]
6590    fn test_pop_eax_rejected() {
6591        // pop eax is invalid in 64-bit mode
6592        let instr = make_instr("pop", vec![Register(Eax)]);
6593        let result = encode_instruction(&instr, Arch::X86_64);
6594        assert!(result.is_err());
6595    }
6596
6597    #[test]
6598    fn test_push_al_rejected() {
6599        // push al is invalid
6600        let instr = make_instr("push", vec![Register(Al)]);
6601        let result = encode_instruction(&instr, Arch::X86_64);
6602        assert!(result.is_err());
6603    }
6604
6605    #[test]
6606    fn test_push_cs_rejected() {
6607        // push cs invalid in 64-bit mode
6608        let instr = make_instr("push", vec![Register(Cs)]);
6609        let result = encode_instruction(&instr, Arch::X86_64);
6610        assert!(result.is_err());
6611    }
6612
6613    #[test]
6614    fn test_pop_ds_rejected() {
6615        // pop ds invalid in 64-bit mode
6616        let instr = make_instr("pop", vec![Register(Ds)]);
6617        let result = encode_instruction(&instr, Arch::X86_64);
6618        assert!(result.is_err());
6619    }
6620
6621    // === CMOVcc 8-bit rejection ===
6622
6623    #[test]
6624    fn test_cmove_8bit_rejected() {
6625        let instr = make_instr("cmove", vec![Register(Al), Register(Bl)]);
6626        let result = encode_instruction(&instr, Arch::X86_64);
6627        assert!(result.is_err());
6628    }
6629
6630    // === disp_label relocation propagation ===
6631
6632    #[test]
6633    fn test_add_rax_mem_label_reloc() {
6634        // add rax, [my_data] — should produce relocation
6635        let mem = MemoryOperand {
6636            base: Some(Rip),
6637            disp_label: Some(alloc::string::String::from("my_data")),
6638            addr_mode: AddrMode::Offset,
6639            ..Default::default()
6640        };
6641        let instr = make_instr("add", vec![Register(Rax), Memory(Box::new(mem))]);
6642        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6643        assert!(
6644            result.relocation.is_some(),
6645            "expected relocation for add rax, [my_data]"
6646        );
6647        let reloc = result.relocation.unwrap();
6648        assert_eq!(&*reloc.label, "my_data");
6649        assert_eq!(reloc.kind, RelocKind::X86Relative);
6650    }
6651
6652    #[test]
6653    fn test_cmp_mem_label_imm_reloc() {
6654        // cmp dword ptr [my_data], 0 — should produce relocation via centralized scan
6655        let mem = MemoryOperand {
6656            base: Some(Rip),
6657            disp_label: Some(alloc::string::String::from("counter")),
6658            addr_mode: AddrMode::Offset,
6659            ..Default::default()
6660        };
6661        let mut instr = make_instr("cmp", vec![Memory(Box::new(mem)), Immediate(0)]);
6662        instr.size_hint = Some(OperandSize::Dword);
6663        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6664        assert!(
6665            result.relocation.is_some(),
6666            "expected relocation for cmp [counter], 0"
6667        );
6668        let reloc = result.relocation.unwrap();
6669        assert_eq!(&*reloc.label, "counter");
6670    }
6671
6672    #[test]
6673    fn test_mov_mem_label_reg_reloc() {
6674        // mov [my_var], rax — should produce relocation
6675        let mem = MemoryOperand {
6676            base: Some(Rip),
6677            disp_label: Some(alloc::string::String::from("my_var")),
6678            addr_mode: AddrMode::Offset,
6679            ..Default::default()
6680        };
6681        let instr = make_instr("mov", vec![Memory(Box::new(mem)), Register(Rax)]);
6682        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6683        assert!(
6684            result.relocation.is_some(),
6685            "expected relocation for mov [my_var], rax"
6686        );
6687        let reloc = result.relocation.unwrap();
6688        assert_eq!(&*reloc.label, "my_var");
6689        assert_eq!(reloc.kind, RelocKind::X86Relative);
6690    }
6691
6692    #[test]
6693    fn test_mov_mem_label_imm_reloc() {
6694        // mov dword ptr [flag], 1 — should produce relocation
6695        let mem = MemoryOperand {
6696            base: Some(Rip),
6697            disp_label: Some(alloc::string::String::from("flag")),
6698            addr_mode: AddrMode::Offset,
6699            ..Default::default()
6700        };
6701        let mut instr = make_instr("mov", vec![Memory(Box::new(mem)), Immediate(1)]);
6702        instr.size_hint = Some(OperandSize::Dword);
6703        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6704        assert!(
6705            result.relocation.is_some(),
6706            "expected relocation for mov [flag], 1"
6707        );
6708        let reloc = result.relocation.unwrap();
6709        assert_eq!(&*reloc.label, "flag");
6710        assert_eq!(reloc.kind, RelocKind::X86Relative);
6711    }
6712
6713    #[test]
6714    fn test_test_mem_label_reloc() {
6715        // test dword ptr [status], 1 — should produce relocation via centralized scan
6716        let mem = MemoryOperand {
6717            base: Some(Rip),
6718            disp_label: Some(alloc::string::String::from("status")),
6719            addr_mode: AddrMode::Offset,
6720            ..Default::default()
6721        };
6722        let mut instr = make_instr("test", vec![Memory(Box::new(mem)), Immediate(1)]);
6723        instr.size_hint = Some(OperandSize::Dword);
6724        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6725        assert!(
6726            result.relocation.is_some(),
6727            "expected relocation for test [status], 1"
6728        );
6729    }
6730
6731    // ── P0-1: Segment override prefix must come BEFORE REX/opcode ──
6732
6733    #[test]
6734    fn test_segment_override_fs_prefix_position() {
6735        // mov rax, fs:[rbx] → 64 48 8B 03
6736        let mem = MemoryOperand {
6737            base: Some(Rbx),
6738            segment: Some(Fs),
6739            ..Default::default()
6740        };
6741        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
6742        // First byte must be 0x64 (FS override), not the REX prefix
6743        assert_eq!(bytes[0], 0x64, "FS segment override must be first byte");
6744        assert_eq!(bytes[1], 0x48, "REX.W must follow segment override");
6745        assert_eq!(bytes[2], 0x8B, "opcode must follow REX");
6746    }
6747
6748    #[test]
6749    fn test_segment_override_gs_prefix_position() {
6750        // mov eax, gs:[rdx] → 65 8B 02
6751        let mem = MemoryOperand {
6752            base: Some(Rdx),
6753            segment: Some(Gs),
6754            ..Default::default()
6755        };
6756        let bytes = encode("mov", vec![Register(Eax), Memory(Box::new(mem))]);
6757        assert_eq!(bytes[0], 0x65, "GS segment override must be first byte");
6758        assert_eq!(bytes[1], 0x8B, "opcode must follow segment override");
6759    }
6760
6761    #[test]
6762    fn test_segment_override_with_extended_reg() {
6763        // add rax, fs:[r12] → 64 49 03 04 24 (FS + REX.WB + opcode + SIB)
6764        let mem = MemoryOperand {
6765            base: Some(R12),
6766            segment: Some(Fs),
6767            ..Default::default()
6768        };
6769        let bytes = encode("add", vec![Register(Rax), Memory(Box::new(mem))]);
6770        assert_eq!(bytes[0], 0x64, "FS must precede REX");
6771        assert_eq!(bytes[1] & 0xF0, 0x40, "REX must follow segment override");
6772    }
6773
6774    // ── P0-2: SIB index-only addressing ──
6775
6776    #[test]
6777    fn test_sib_index_only_disp0() {
6778        // mov rax, [rsi*4] → 48 8B 04 B5 00 00 00 00
6779        // Must have mod=00, base=101, and 4 bytes of disp32=0
6780        let mem = MemoryOperand {
6781            index: Some(Rsi),
6782            scale: 4,
6783            ..Default::default()
6784        };
6785        let bytes = encode("mov", vec![Register(Rax), Memory(Box::new(mem))]);
6786        assert_eq!(bytes.len(), 8, "SIB index-only must include 4-byte disp32");
6787        assert_eq!(bytes[0], 0x48); // REX.W
6788        assert_eq!(bytes[1], 0x8B); // MOV
6789        assert_eq!(bytes[2] & 0xC7, 0x04); // mod=00, rm=100 (SIB)
6790                                           // SIB: scale=4 (log2=10), index=RSI (110), base=101 (no base)
6791        assert_eq!(bytes[3], 0xB5); // 10_110_101
6792        assert_eq!(&bytes[4..8], &[0, 0, 0, 0]); // disp32=0
6793    }
6794
6795    #[test]
6796    fn test_sib_index_only_with_disp() {
6797        // lea rax, [rdi*8+16] → 48 8D 04 FD 10 00 00 00
6798        // NOT [rbp + rdi*8 + 16]
6799        let mem = MemoryOperand {
6800            index: Some(Rdi),
6801            scale: 8,
6802            disp: 16,
6803            ..Default::default()
6804        };
6805        let bytes = encode("lea", vec![Register(Rax), Memory(Box::new(mem))]);
6806        assert_eq!(bytes.len(), 8, "SIB index-only with disp must use disp32");
6807        assert_eq!(bytes[2] & 0xC0, 0x00, "mod must be 00 (disp32, no base)");
6808        // disp32 should be 16
6809        assert_eq!(&bytes[4..8], &(16i32).to_le_bytes());
6810    }
6811
6812    #[test]
6813    fn test_sib_index_only_extended_index() {
6814        // mov eax, [r9*2] → 42 8B 04 4D 00 00 00 00
6815        let mem = MemoryOperand {
6816            index: Some(R9),
6817            scale: 2,
6818            ..Default::default()
6819        };
6820        let bytes = encode("mov", vec![Register(Eax), Memory(Box::new(mem))]);
6821        assert_eq!(bytes.len(), 8, "SIB index-only with extended index");
6822        assert_eq!(bytes[0] & 0x42, 0x42, "REX.X must be set for R9 index");
6823    }
6824
6825    // ── P2: 8-bit operand validation ──
6826
6827    #[test]
6828    fn test_bt_rejects_8bit() {
6829        let instr = make_instr("bt", vec![Register(Al), Immediate(1)]);
6830        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6831    }
6832
6833    #[test]
6834    fn test_bsf_rejects_8bit() {
6835        let instr = make_instr("bsf", vec![Register(Al), Register(Cl)]);
6836        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6837    }
6838
6839    #[test]
6840    fn test_bsr_rejects_8bit() {
6841        let instr = make_instr("bsr", vec![Register(Al), Register(Cl)]);
6842        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6843    }
6844
6845    #[test]
6846    fn test_popcnt_rejects_8bit() {
6847        let instr = make_instr("popcnt", vec![Register(Al), Register(Cl)]);
6848        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6849    }
6850
6851    #[test]
6852    fn test_lzcnt_rejects_8bit() {
6853        let instr = make_instr("lzcnt", vec![Register(Al), Register(Cl)]);
6854        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6855    }
6856
6857    #[test]
6858    fn test_tzcnt_rejects_8bit() {
6859        let instr = make_instr("tzcnt", vec![Register(Al), Register(Cl)]);
6860        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6861    }
6862
6863    #[test]
6864    fn test_bswap_rejects_8bit() {
6865        let instr = make_instr("bswap", vec![Register(Al)]);
6866        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6867    }
6868
6869    #[test]
6870    fn test_bswap_rejects_16bit() {
6871        let instr = make_instr("bswap", vec![Register(Ax)]);
6872        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
6873    }
6874
6875    // ── P3-1: No redundant REX.W on push/pop/jmp/call [mem] ──
6876
6877    #[test]
6878    fn test_push_mem_no_redundant_rex() {
6879        // push [rdi] → FF 37 (not 48 FF 37)
6880        let mem = MemoryOperand {
6881            base: Some(Rdi),
6882            ..Default::default()
6883        };
6884        let bytes = encode("push", vec![Memory(Box::new(mem))]);
6885        assert_eq!(bytes, &[0xFF, 0x37], "push [rdi] should not have REX.W");
6886    }
6887
6888    #[test]
6889    fn test_pop_mem_no_redundant_rex() {
6890        // pop [rdi] → 8F 07 (not 48 8F 07)
6891        let mem = MemoryOperand {
6892            base: Some(Rdi),
6893            ..Default::default()
6894        };
6895        let bytes = encode("pop", vec![Memory(Box::new(mem))]);
6896        assert_eq!(bytes, &[0x8F, 0x07], "pop [rdi] should not have REX.W");
6897    }
6898
6899    #[test]
6900    fn test_jmp_mem_no_redundant_rex() {
6901        // jmp [rdi] → FF 27 (not 48 FF 27)
6902        let mem = MemoryOperand {
6903            base: Some(Rdi),
6904            ..Default::default()
6905        };
6906        let bytes = encode("jmp", vec![Memory(Box::new(mem))]);
6907        assert_eq!(bytes, &[0xFF, 0x27], "jmp [rdi] should not have REX.W");
6908    }
6909
6910    #[test]
6911    fn test_call_mem_no_redundant_rex() {
6912        // call [rdi] → FF 17 (not 48 FF 17)
6913        let mem = MemoryOperand {
6914            base: Some(Rdi),
6915            ..Default::default()
6916        };
6917        let bytes = encode("call", vec![Memory(Box::new(mem))]);
6918        assert_eq!(bytes, &[0xFF, 0x17], "call [rdi] should not have REX.W");
6919    }
6920
6921    #[test]
6922    fn test_push_mem_extended_needs_rex() {
6923        // push [r15] → 41 FF 37 (REX.B for r15, but not REX.W)
6924        let mem = MemoryOperand {
6925            base: Some(R15),
6926            ..Default::default()
6927        };
6928        let bytes = encode("push", vec![Memory(Box::new(mem))]);
6929        assert_eq!(bytes[0], 0x41, "push [r15] needs REX.B");
6930        assert_eq!(bytes[1], 0xFF);
6931    }
6932
6933    // ── P3-2: xchg short form both directions ──
6934
6935    #[test]
6936    fn test_xchg_rbx_rax_short_form() {
6937        // xchg rbx, rax → 48 93 (same as xchg rax, rbx)
6938        let bytes = encode("xchg", vec![Register(Rbx), Register(Rax)]);
6939        assert_eq!(bytes, &[0x48, 0x93], "xchg rbx, rax should use short form");
6940    }
6941
6942    #[test]
6943    fn test_xchg_ecx_eax_short_form() {
6944        // xchg ecx, eax → 91 (same as xchg eax, ecx)
6945        let bytes = encode("xchg", vec![Register(Ecx), Register(Eax)]);
6946        assert_eq!(bytes, &[0x91], "xchg ecx, eax should use short form");
6947    }
6948
6949    // ── P1-2: ALU mem,imm reloc uses explicit disp offset (not heuristic) ──
6950
6951    #[test]
6952    fn test_alu_mem_imm_reloc_explicit() {
6953        // add dword ptr [label], 5 — relocation must point to displacement, not imm
6954        let mem = MemoryOperand {
6955            base: Some(Rip),
6956            disp_label: Some(alloc::string::String::from("data")),
6957            addr_mode: AddrMode::Offset,
6958            ..Default::default()
6959        };
6960        let mut instr = make_instr("add", vec![Memory(Box::new(mem)), Immediate(5)]);
6961        instr.size_hint = Some(OperandSize::Dword);
6962        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6963        assert!(
6964            result.relocation.is_some(),
6965            "expected relocation for add [label], imm"
6966        );
6967        // The relocation offset should point inside the instruction, not at the trailing imm8
6968        let roff = result.relocation.as_ref().unwrap().offset;
6969        // For add [rip+disp32], imm8: opcode(1) + modrm(1) → disp32 at offset 2
6970        assert_eq!(roff, 2, "reloc should point at displacement, not immediate");
6971    }
6972
6973    // ── P1-3: No double segment override from builder API ──
6974
6975    #[test]
6976    fn test_no_double_segment_override() {
6977        // Builder constructs: prefixes=[SegFs] AND mem.segment=Some(Fs)
6978        let mem = MemoryOperand {
6979            base: Some(Rbx),
6980            segment: Some(Fs),
6981            ..Default::default()
6982        };
6983        let mut instr = make_instr("mov", vec![Register(Rax), Memory(Box::new(mem))]);
6984        instr.prefixes.push(Prefix::SegFs);
6985        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
6986        // Should only have ONE 0x64 byte, not two
6987        let count_64 = result.bytes.iter().filter(|&&b| b == 0x64).count();
6988        assert_eq!(count_64, 1, "should not emit double FS segment override");
6989    }
6990
6991    #[test]
6992    fn test_rip_relative_trailing_bytes_mov_mem_imm() {
6993        // mov dword ptr [rip+label], 42  →  C7 05 [disp32] [imm32]
6994        // The reloc should have trailing_bytes = 4 (the imm32 after disp32)
6995        let mem = MemoryOperand {
6996            base: Some(crate::ir::Register::Rip),
6997            index: None,
6998            scale: 1,
6999            disp: 0,
7000            size: Some(OperandSize::Dword),
7001            segment: None,
7002            disp_label: Some(String::from("data")),
7003            addr_mode: AddrMode::Offset,
7004            index_subtract: false,
7005        };
7006        let instr = make_instr_with_hint(
7007            "mov",
7008            vec![Memory(Box::new(mem)), Immediate(42)],
7009            Some(OperandSize::Dword),
7010        );
7011        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7012        // Expected: C7 05 00 00 00 00 2A 00 00 00
7013        assert_eq!(result.bytes[0], 0xC7); // opcode
7014        assert_eq!(result.bytes[1], 0x05); // modrm: mod=00, /0, rm=101 (RIP)
7015        let reloc = result.relocation.as_ref().unwrap();
7016        assert_eq!(reloc.offset, 2); // disp32 starts at byte 2
7017        assert_eq!(reloc.size, 4); // 4-byte disp32
7018        assert_eq!(reloc.kind, RelocKind::X86Relative);
7019        assert_eq!(reloc.trailing_bytes, 4); // 4 bytes of imm32 follow
7020        assert_eq!(result.bytes.len(), 10); // total: opcode(1) + modrm(1) + disp32(4) + imm32(4) = 10
7021    }
7022
7023    #[test]
7024    fn test_rip_relative_trailing_bytes_mov_mem_reg() {
7025        // mov [rip+label], rax  →  48 89 05 [disp32]
7026        // The reloc should have trailing_bytes = 0 (nothing after disp32)
7027        let mem = MemoryOperand {
7028            base: Some(crate::ir::Register::Rip),
7029            index: None,
7030            scale: 1,
7031            disp: 0,
7032            size: None,
7033            segment: None,
7034            disp_label: Some(String::from("data")),
7035            addr_mode: AddrMode::Offset,
7036            index_subtract: false,
7037        };
7038        let instr = make_instr(
7039            "mov",
7040            vec![
7041                Memory(Box::new(mem)),
7042                Operand::Register(crate::ir::Register::Rax),
7043            ],
7044        );
7045        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7046        let reloc = result.relocation.as_ref().unwrap();
7047        assert_eq!(reloc.kind, RelocKind::X86Relative);
7048        assert_eq!(reloc.trailing_bytes, 0); // disp32 is at the end of instruction
7049    }
7050
7051    #[test]
7052    fn test_rip_relative_trailing_bytes_alu_mem_imm8() {
7053        // add dword ptr [rip+label], 5  →  83 05 [disp32] 05
7054        // trailing_bytes = 1 (the imm8 after disp32)
7055        let mem = MemoryOperand {
7056            base: Some(crate::ir::Register::Rip),
7057            index: None,
7058            scale: 1,
7059            disp: 0,
7060            size: Some(OperandSize::Dword),
7061            segment: None,
7062            disp_label: Some(String::from("target")),
7063            addr_mode: AddrMode::Offset,
7064            index_subtract: false,
7065        };
7066        let instr = make_instr_with_hint(
7067            "add",
7068            vec![Memory(Box::new(mem)), Immediate(5)],
7069            Some(OperandSize::Dword),
7070        );
7071        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7072        let reloc = result.relocation.as_ref().unwrap();
7073        assert_eq!(reloc.kind, RelocKind::X86Relative);
7074        assert_eq!(reloc.trailing_bytes, 1); // 1 byte of imm8 follows
7075    }
7076
7077    #[test]
7078    fn test_rip_relative_trailing_bytes_jmp() {
7079        // jmp label  →  E9 [disp32]
7080        // trailing_bytes = 0
7081        let instr = make_instr("jmp", vec![Label(String::from("target"))]);
7082        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7083        let reloc = result.relocation.as_ref().unwrap();
7084        assert_eq!(reloc.kind, RelocKind::X86Relative);
7085        assert_eq!(reloc.trailing_bytes, 0);
7086    }
7087
7088    // === 8th Audit: Push imm range validation ===
7089
7090    #[test]
7091    fn test_push_imm8_short_form() {
7092        // push 0x42 → 6A 42
7093        assert_eq!(encode("push", vec![Immediate(0x42)]), vec![0x6A, 0x42]);
7094    }
7095
7096    #[test]
7097    fn test_push_imm32_full_form() {
7098        assert_eq!(
7099            encode("push", vec![Immediate(0x12345678)]),
7100            vec![0x68, 0x78, 0x56, 0x34, 0x12]
7101        );
7102    }
7103
7104    #[test]
7105    fn test_push_imm_out_of_range_rejects() {
7106        // push 0x1_0000_0000 → should error (doesn't fit imm32)
7107        let instr = make_instr("push", vec![Immediate(0x1_0000_0000)]);
7108        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7109    }
7110
7111    // === 8th Audit: IMUL 8-bit rejection ===
7112
7113    #[test]
7114    fn test_imul_2op_rejects_8bit() {
7115        let instr = make_instr("imul", vec![Register(Al), Register(Bl)]);
7116        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7117    }
7118
7119    #[test]
7120    fn test_imul_2op_mem_rejects_8bit() {
7121        let mem = MemoryOperand {
7122            base: Some(Rbx),
7123            ..Default::default()
7124        };
7125        let instr = make_instr("imul", vec![Register(Al), Memory(Box::new(mem))]);
7126        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7127    }
7128
7129    #[test]
7130    fn test_imul_3op_rejects_8bit() {
7131        let instr = make_instr("imul", vec![Register(Al), Register(Bl), Immediate(5)]);
7132        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7133    }
7134
7135    #[test]
7136    fn test_imul_3op_mem_rejects_8bit() {
7137        let mem = MemoryOperand {
7138            base: Some(Rcx),
7139            ..Default::default()
7140        };
7141        let instr = make_instr(
7142            "imul",
7143            vec![Register(Al), Memory(Box::new(mem)), Immediate(5)],
7144        );
7145        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7146    }
7147
7148    // === 8th Audit: CMOVcc 8-bit rejection (reg, mem path) ===
7149
7150    #[test]
7151    fn test_cmovcc_reg_mem_rejects_8bit() {
7152        let mem = MemoryOperand {
7153            base: Some(Rbx),
7154            ..Default::default()
7155        };
7156        let instr = make_instr("cmove", vec![Register(Al), Memory(Box::new(mem))]);
7157        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7158    }
7159
7160    // === 8th Audit: SETcc rejects non-8-bit registers ===
7161
7162    #[test]
7163    fn test_setcc_rejects_32bit_register() {
7164        let instr = make_instr("sete", vec![Register(Eax)]);
7165        assert!(encode_instruction(&instr, Arch::X86_64).is_err());
7166    }
7167
7168    #[test]
7169    fn test_setcc_accepts_8bit_register() {
7170        // sete al → 0F 94 C0
7171        assert_eq!(encode("sete", vec![Register(Al)]), vec![0x0F, 0x94, 0xC0]);
7172    }
7173
7174    // === 8th Audit: movzx/movsx mem.size fallback ===
7175
7176    #[test]
7177    fn test_movzx_mem_word_source_via_mem_size() {
7178        // movzx eax, word ptr [rbx] via mem.size (builder API path)
7179        let mem = MemoryOperand {
7180            base: Some(Rbx),
7181            size: Some(OperandSize::Word),
7182            ..Default::default()
7183        };
7184        // No size_hint on instruction — should use mem.size
7185        let instr = make_instr("movzx", vec![Register(Eax), Memory(Box::new(mem))]);
7186        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7187        // movzx eax, word [rbx] → 0F B7 03  (B7 = word source, not B6 = byte)
7188        assert_eq!(result.bytes, vec![0x0F, 0xB7, 0x03]);
7189    }
7190
7191    #[test]
7192    fn test_movsx_mem_word_source_via_mem_size() {
7193        // movsx eax, word ptr [rbx] via mem.size (builder API path)
7194        let mem = MemoryOperand {
7195            base: Some(Rbx),
7196            size: Some(OperandSize::Word),
7197            ..Default::default()
7198        };
7199        let instr = make_instr("movsx", vec![Register(Eax), Memory(Box::new(mem))]);
7200        let result = encode_instruction(&instr, Arch::X86_64).unwrap();
7201        // movsx eax, word [rbx] → 0F BF 03  (BF = word source, not BE = byte)
7202        assert_eq!(result.bytes, vec![0x0F, 0xBF, 0x03]);
7203    }
7204
7205    // ─── 16-bit mode (encode_instruction_16) tests ───────────────
7206
7207    #[test]
7208    fn test_16bit_mov_ax_imm16() {
7209        // In 16-bit mode, mov ax, 0x1234 → B8 34 12 (no 0x66 prefix)
7210        // The 32-bit encoder produces 66 B8 34 12, toggle removes 0x66.
7211        let instr = make_instr("mov", vec![Register(Ax), Immediate(0x1234)]);
7212        let result = encode_instruction_16(&instr).unwrap();
7213        assert_eq!(result.bytes, vec![0xB8, 0x34, 0x12]);
7214    }
7215
7216    #[test]
7217    fn test_16bit_mov_eax_imm32() {
7218        // In 16-bit mode, mov eax, 0x12345678 → 66 B8 78 56 34 12
7219        // The 32-bit encoder produces B8 78 56 34 12, toggle adds 0x66.
7220        let instr = make_instr("mov", vec![Register(Eax), Immediate(0x1234_5678)]);
7221        let result = encode_instruction_16(&instr).unwrap();
7222        assert_eq!(result.bytes, vec![0x66, 0xB8, 0x78, 0x56, 0x34, 0x12]);
7223    }
7224
7225    #[test]
7226    fn test_16bit_xor_ax_ax() {
7227        // xor ax, ax → 31 C0 (no prefix in 16-bit mode)
7228        let instr = make_instr("xor", vec![Register(Ax), Register(Ax)]);
7229        let result = encode_instruction_16(&instr).unwrap();
7230        assert_eq!(result.bytes, vec![0x31, 0xC0]);
7231    }
7232
7233    #[test]
7234    fn test_16bit_xor_eax_eax() {
7235        // xor eax, eax → 66 31 C0 (prefix needed for 32-bit in 16-bit mode)
7236        let instr = make_instr("xor", vec![Register(Eax), Register(Eax)]);
7237        let result = encode_instruction_16(&instr).unwrap();
7238        assert_eq!(result.bytes, vec![0x66, 0x31, 0xC0]);
7239    }
7240
7241    #[test]
7242    fn test_16bit_push_ax() {
7243        // push ax → 50 (no prefix in 16-bit mode)
7244        let instr = make_instr("push", vec![Register(Ax)]);
7245        let result = encode_instruction_16(&instr).unwrap();
7246        assert_eq!(result.bytes, vec![0x50]);
7247    }
7248
7249    #[test]
7250    fn test_16bit_push_eax() {
7251        // push eax → 66 50 (prefix needed for 32-bit in 16-bit mode)
7252        let instr = make_instr("push", vec![Register(Eax)]);
7253        let result = encode_instruction_16(&instr).unwrap();
7254        assert_eq!(result.bytes, vec![0x66, 0x50]);
7255    }
7256
7257    #[test]
7258    fn test_16bit_pop_bx() {
7259        // pop bx → 5B (no prefix in 16-bit mode)
7260        let instr = make_instr("pop", vec![Register(Bx)]);
7261        let result = encode_instruction_16(&instr).unwrap();
7262        assert_eq!(result.bytes, vec![0x5B]);
7263    }
7264
7265    #[test]
7266    fn test_16bit_pop_ebx() {
7267        // pop ebx → 66 5B (prefix needed for 32-bit)
7268        let instr = make_instr("pop", vec![Register(Ebx)]);
7269        let result = encode_instruction_16(&instr).unwrap();
7270        assert_eq!(result.bytes, vec![0x66, 0x5B]);
7271    }
7272
7273    #[test]
7274    fn test_16bit_inc_cx() {
7275        // inc cx → 41 (short form, no prefix)
7276        let instr = make_instr("inc", vec![Register(Cx)]);
7277        let result = encode_instruction_16(&instr).unwrap();
7278        assert_eq!(result.bytes, vec![0x41]);
7279    }
7280
7281    #[test]
7282    fn test_16bit_inc_ecx() {
7283        // inc ecx → 66 41 (prefix needed for 32-bit)
7284        let instr = make_instr("inc", vec![Register(Ecx)]);
7285        let result = encode_instruction_16(&instr).unwrap();
7286        assert_eq!(result.bytes, vec![0x66, 0x41]);
7287    }
7288
7289    #[test]
7290    fn test_16bit_dec_dx() {
7291        // dec dx → 4A (short form, no prefix)
7292        let instr = make_instr("dec", vec![Register(Dx)]);
7293        let result = encode_instruction_16(&instr).unwrap();
7294        assert_eq!(result.bytes, vec![0x4A]);
7295    }
7296
7297    #[test]
7298    fn test_16bit_nop() {
7299        // nop → 90 (identical in all modes)
7300        let instr = make_instr("nop", vec![]);
7301        let result = encode_instruction_16(&instr).unwrap();
7302        assert_eq!(result.bytes, vec![0x90]);
7303    }
7304
7305    #[test]
7306    fn test_16bit_cli() {
7307        // cli → FA (identical in all modes)
7308        let instr = make_instr("cli", vec![]);
7309        let result = encode_instruction_16(&instr).unwrap();
7310        assert_eq!(result.bytes, vec![0xFA]);
7311    }
7312
7313    #[test]
7314    fn test_16bit_int_10h() {
7315        // int 0x10 → CD 10 (identical in all modes)
7316        let instr = make_instr("int", vec![Immediate(0x10)]);
7317        let result = encode_instruction_16(&instr).unwrap();
7318        assert_eq!(result.bytes, vec![0xCD, 0x10]);
7319    }
7320
7321    #[test]
7322    fn test_16bit_push_es() {
7323        // push es → 06 (segment push valid in 16-bit mode, no prefix toggle)
7324        let instr = make_instr("push", vec![Register(Es)]);
7325        let result = encode_instruction_16(&instr).unwrap();
7326        assert_eq!(result.bytes, vec![0x06]);
7327    }
7328
7329    #[test]
7330    fn test_16bit_push_cs() {
7331        // push cs → 0E
7332        let instr = make_instr("push", vec![Register(Cs)]);
7333        let result = encode_instruction_16(&instr).unwrap();
7334        assert_eq!(result.bytes, vec![0x0E]);
7335    }
7336
7337    #[test]
7338    fn test_16bit_pop_ds() {
7339        // pop ds → 1F
7340        let instr = make_instr("pop", vec![Register(Ds)]);
7341        let result = encode_instruction_16(&instr).unwrap();
7342        assert_eq!(result.bytes, vec![0x1F]);
7343    }
7344
7345    #[test]
7346    fn test_16bit_push_imm8() {
7347        // push 0x42 → 6A 42 (imm8 encoding, same in all modes)
7348        let instr = make_instr("push", vec![Immediate(0x42)]);
7349        let result = encode_instruction_16(&instr).unwrap();
7350        assert_eq!(result.bytes, vec![0x6A, 0x42]);
7351    }
7352
7353    #[test]
7354    fn test_16bit_add_ax_bx() {
7355        // add ax, bx → 01 D8 (no prefix in 16-bit mode)
7356        let instr = make_instr("add", vec![Register(Ax), Register(Bx)]);
7357        let result = encode_instruction_16(&instr).unwrap();
7358        assert_eq!(result.bytes, vec![0x01, 0xD8]);
7359    }
7360
7361    #[test]
7362    fn test_16bit_mov_al_imm8() {
7363        // mov al, 0x42 → B0 42 (8-bit, no operand-size prefix in any mode)
7364        let instr = make_instr("mov", vec![Register(Al), Immediate(0x42)]);
7365        let result = encode_instruction_16(&instr).unwrap();
7366        assert_eq!(result.bytes, vec![0xB0, 0x42]);
7367    }
7368
7369    #[test]
7370    fn test_16bit_rejects_64bit_register() {
7371        // 64-bit registers are invalid in 16-bit mode
7372        let instr = make_instr("mov", vec![Register(Rax), Immediate(1)]);
7373        assert!(encode_instruction_16(&instr).is_err());
7374    }
7375
7376    // ===================================================================
7377    // AVX-512 (EVEX-encoded) instruction tests
7378    // ===================================================================
7379
7380    // ── EVEX prefix helper unit tests ────────────────────────────────
7381
7382    #[test]
7383    fn test_emit_evex_basic_prefix() {
7384        // Test the raw EVEX prefix bytes for known inputs
7385        let mut buf = InstrBytes::new();
7386        // Simulate VADDPS zmm0, zmm1, zmm2:
7387        // R=false(zmm0 not ext), X=false, B=false(zmm2 not ext),
7388        // R'=false(zmm0 bit4=0), mm=1, W=false, vvvv=1(zmm1), V'=false, pp=0,
7389        // z=false, ll=2(512), b=false, aaa=0
7390        emit_evex(
7391            &mut buf, false, false, false, false, 1, false, 1, false, 0, false, 2, false, 0,
7392        );
7393        assert_eq!(buf[0], 0x62, "EVEX escape byte");
7394        // P0: ~R=1 ~X=1 ~B=1 ~R'=1 0 0 01 = 0xF1
7395        assert_eq!(buf[1], 0xF1, "P0");
7396        // P1: W=0 ~vvvv=~0001=1110 1 pp=00 = 0x74
7397        assert_eq!(buf[2], 0x74, "P1");
7398        // P2: z=0 L'L=10 b=0 ~V'=1 aaa=000 = 0x48
7399        assert_eq!(buf[3], 0x48, "P2");
7400    }
7401
7402    #[test]
7403    fn test_emit_evex_w_bit() {
7404        let mut buf = InstrBytes::new();
7405        // W=1, everything else zero-ish, mm=1
7406        emit_evex(
7407            &mut buf, false, false, false, false, 1, true, 0, false, 0, false, 2, false, 0,
7408        );
7409        // P1 should have W=1: 0x80 | ~0000<<3=0x78 | 0x04 | pp=0 = 0xFC
7410        assert_eq!(buf[2], 0xFC, "P1 with W=1");
7411    }
7412
7413    #[test]
7414    fn test_emit_evex_extended_reg() {
7415        let mut buf = InstrBytes::new();
7416        // R=true (extended), R'=true (evex extended) for zmm28 etc.
7417        emit_evex(
7418            &mut buf, true, false, false, true, 1, false, 0, false, 0, false, 2, false, 0,
7419        );
7420        // P0: ~R=0 ~X=1 ~B=1 ~R'=0 00 01 = 0x61
7421        assert_eq!(buf[1], 0x61, "P0 with R,R' extended");
7422    }
7423
7424    // ── EVEX evex_ll tests ──────────────────────────────────────────
7425
7426    #[test]
7427    fn test_evex_ll_zmm() {
7428        assert_eq!(evex_ll(Zmm0), 2);
7429        assert_eq!(evex_ll(Zmm31), 2);
7430    }
7431
7432    #[test]
7433    fn test_evex_ll_ymm() {
7434        assert_eq!(evex_ll(Ymm0), 1);
7435    }
7436
7437    #[test]
7438    fn test_evex_ll_xmm() {
7439        assert_eq!(evex_ll(Xmm0), 0);
7440    }
7441
7442    // ── EVEX full instruction encoding ──────────────────────────────
7443
7444    #[test]
7445    fn test_evex_vaddps_zmm0_zmm1_zmm2() {
7446        // VADDPS zmm0, zmm1, zmm2 → 62 F1 74 48 58 C2
7447        let bytes = encode(
7448            "vaddps",
7449            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7450        );
7451        assert_eq!(bytes, vec![0x62, 0xF1, 0x74, 0x48, 0x58, 0xC2]);
7452    }
7453
7454    #[test]
7455    fn test_evex_vaddpd_zmm0_zmm1_zmm2() {
7456        // VADDPD zmm0, zmm1, zmm2 → 62 F1 F5 48 58 C2
7457        // W=1 → P1 = 0x80 | 0x74 = 0xF4, but vvvv=1 so ~vvvv=1110,
7458        // P1 = 0x80 | (0x0E << 3) | 0x04 | 0x01 = 0x80|0x70|0x04|0x01 = 0xF5
7459        let bytes = encode(
7460            "vaddpd",
7461            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7462        );
7463        assert_eq!(bytes, vec![0x62, 0xF1, 0xF5, 0x48, 0x58, 0xC2]);
7464    }
7465
7466    #[test]
7467    fn test_evex_vsubps_zmm3_zmm4_zmm5() {
7468        // VSUBPS zmm3, zmm4, zmm5
7469        // dst=zmm3(code=3), src1=zmm4(code=4), src2=zmm5(code=5)
7470        // P0: all not ext → 0xF1
7471        // P1: W=0, vvvv=4 → ~4=~0100=1011 → 01011, 1, 00 → 0x5C...
7472        // ~vvvv = 0x0B, P1 = (0x0B << 3) | 0x04 | 0x00 = 0x58|0x04 = 0x5C
7473        // P2: z=0, L'L=10, b=0, ~V'=1, aaa=0 → 0x48
7474        // opcode=0x5C, ModRM: 0xC0|(3<<3)|5 = 0xDD
7475        let bytes = encode(
7476            "vsubps",
7477            vec![Register(Zmm3), Register(Zmm4), Register(Zmm5)],
7478        );
7479        assert_eq!(bytes, vec![0x62, 0xF1, 0x5C, 0x48, 0x5C, 0xDD]);
7480    }
7481
7482    #[test]
7483    fn test_evex_vmulps_zmm0_zmm1_zmm2() {
7484        let bytes = encode(
7485            "vmulps",
7486            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7487        );
7488        assert_eq!(bytes, vec![0x62, 0xF1, 0x74, 0x48, 0x59, 0xC2]);
7489    }
7490
7491    #[test]
7492    fn test_evex_vdivps_zmm0_zmm1_zmm2() {
7493        let bytes = encode(
7494            "vdivps",
7495            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7496        );
7497        assert_eq!(bytes, vec![0x62, 0xF1, 0x74, 0x48, 0x5E, 0xC2]);
7498    }
7499
7500    #[test]
7501    fn test_evex_vmovaps_zmm0_zmm1() {
7502        // VMOVAPS zmm0, zmm1 (2-op, vvvv=0)
7503        // P1: W=0, vvvv=0 → ~0=1111, P1 = (0x0F<<3)|0x04|0x00 = 0x78|0x04 = 0x7C
7504        // P2: z=0, L'L=10, b=0, ~V'=1, aaa=0 → 0x48
7505        // opcode=0x28, ModRM: 0xC0|(0<<3)|1 = 0xC1
7506        let bytes = encode("vmovaps", vec![Register(Zmm0), Register(Zmm1)]);
7507        assert_eq!(bytes, vec![0x62, 0xF1, 0x7C, 0x48, 0x28, 0xC1]);
7508    }
7509
7510    #[test]
7511    fn test_evex_vmovdqa32_zmm0_zmm1() {
7512        // VMOVDQA32 zmm0, zmm1 → pp=0x66(→1), W=0
7513        // P1: W=0, vvvv=0, pp=0x01 → 0x7C|0x01 = 0x7D
7514        let bytes = encode("vmovdqa32", vec![Register(Zmm0), Register(Zmm1)]);
7515        assert_eq!(bytes, vec![0x62, 0xF1, 0x7D, 0x48, 0x6F, 0xC1]);
7516    }
7517
7518    #[test]
7519    fn test_evex_vmovdqa64_zmm0_zmm1() {
7520        // VMOVDQA64 zmm0, zmm1 → pp=0x66(→1), W=1
7521        // P1: W=1, vvvv=0, pp=0x01 → 0xFC|0x01 = 0xFD
7522        let bytes = encode("vmovdqa64", vec![Register(Zmm0), Register(Zmm1)]);
7523        assert_eq!(bytes, vec![0x62, 0xF1, 0xFD, 0x48, 0x6F, 0xC1]);
7524    }
7525
7526    #[test]
7527    fn test_evex_vpternlogd_zmm0_zmm1_zmm2_imm() {
7528        // VPTERNLOGD zmm0, zmm1, zmm2, 0xFF → EVEX map3, opcode=0x25, W=0
7529        // P0: mm=3 → 0xF3
7530        // P1: W=0, vvvv=1 → 0x74|0x01 = 0x71... wait pp=0x66→1
7531        // P1: (0x0E<<3)|0x04|0x01 = 0x70|0x04|0x01 = 0x75
7532        let bytes = encode(
7533            "vpternlogd",
7534            vec![
7535                Register(Zmm0),
7536                Register(Zmm1),
7537                Register(Zmm2),
7538                Immediate(0xFF),
7539            ],
7540        );
7541        assert_eq!(bytes, vec![0x62, 0xF3, 0x75, 0x48, 0x25, 0xC2, 0xFF]);
7542    }
7543
7544    #[test]
7545    fn test_evex_vpternlogq_zmm0_zmm1_zmm2_imm() {
7546        // VPTERNLOGQ zmm0, zmm1, zmm2, 0xDB → W=1
7547        let bytes = encode(
7548            "vpternlogq",
7549            vec![
7550                Register(Zmm0),
7551                Register(Zmm1),
7552                Register(Zmm2),
7553                Immediate(0xDB),
7554            ],
7555        );
7556        assert_eq!(bytes, vec![0x62, 0xF3, 0xF5, 0x48, 0x25, 0xC2, 0xDB]);
7557    }
7558
7559    #[test]
7560    fn test_evex_vpaddd_zmm0_zmm1_zmm2() {
7561        // VPADDD zmm0, zmm1, zmm2 → map1, pp=66, W=0, opcode=0xFE
7562        let bytes = encode(
7563            "vpaddd",
7564            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7565        );
7566        assert_eq!(bytes, vec![0x62, 0xF1, 0x75, 0x48, 0xFE, 0xC2]);
7567    }
7568
7569    #[test]
7570    fn test_evex_vpaddq_zmm0_zmm1_zmm2() {
7571        // VPADDQ zmm0, zmm1, zmm2 → map1, pp=66, W=1, opcode=0xD4
7572        let bytes = encode(
7573            "vpaddq",
7574            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7575        );
7576        assert_eq!(bytes, vec![0x62, 0xF1, 0xF5, 0x48, 0xD4, 0xC2]);
7577    }
7578
7579    #[test]
7580    fn test_evex_vpxord_zmm0_zmm1_zmm2() {
7581        let bytes = encode(
7582            "vpxord",
7583            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7584        );
7585        assert_eq!(bytes, vec![0x62, 0xF1, 0x75, 0x48, 0xEF, 0xC2]);
7586    }
7587
7588    #[test]
7589    fn test_evex_vpxorq_zmm0_zmm1_zmm2() {
7590        let bytes = encode(
7591            "vpxorq",
7592            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7593        );
7594        assert_eq!(bytes, vec![0x62, 0xF1, 0xF5, 0x48, 0xEF, 0xC2]);
7595    }
7596
7597    #[test]
7598    fn test_evex_vblendmps_zmm0_zmm1_zmm2() {
7599        // VBLENDMPS zmm0, zmm1, zmm2 → map2(0F38), pp=66, W=0, opcode=0x65
7600        let bytes = encode(
7601            "vblendmps",
7602            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7603        );
7604        assert_eq!(bytes, vec![0x62, 0xF2, 0x75, 0x48, 0x65, 0xC2]);
7605    }
7606
7607    #[test]
7608    fn test_evex_vpmullq_zmm0_zmm1_zmm2() {
7609        // VPMULLQ zmm0, zmm1, zmm2 → map2(0F38), pp=66, W=1, opcode=0x40
7610        let bytes = encode(
7611            "vpmullq",
7612            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7613        );
7614        assert_eq!(bytes, vec![0x62, 0xF2, 0xF5, 0x48, 0x40, 0xC2]);
7615    }
7616
7617    // ── Extended registers (ZMM16+) ─────────────────────────────────
7618
7619    #[test]
7620    fn test_evex_vaddps_zmm16_zmm17_zmm18() {
7621        // ZMM16: index=16, base_code=0, is_ext=false(bit3=0), is_evex_ext=true(bit4=1)
7622        // ZMM17: index=17, base_code=1, is_ext=false(bit3=0), is_evex_ext=true(bit4=1)
7623        // ZMM18: index=18, base_code=2, is_ext=false(bit3=0), is_evex_ext=true(bit4=1)
7624        // dst=zmm16: R=ext=false, R'=evex=true
7625        // src1=zmm17: vvvv=1, V'=evex=true
7626        // src2=zmm18: B=ext=false, X=evex=true
7627        // P0: ~R=1(0x80), ~X=0, ~B=1(0x20), ~R'=0, mm=01 → 0xA1
7628        // P1: W=0, vvvv=1 → ~1=1110 → (0xE<<3)|0x04|0x00 = 0x74
7629        // P2: z=0, L'L=10, b=0, ~V'=0, aaa=0 → 0x40
7630        let bytes = encode(
7631            "vaddps",
7632            vec![Register(Zmm16), Register(Zmm17), Register(Zmm18)],
7633        );
7634        assert_eq!(bytes, vec![0x62, 0xA1, 0x74, 0x40, 0x58, 0xC2]);
7635    }
7636
7637    #[test]
7638    fn test_evex_vmovaps_zmm31_zmm16() {
7639        // ZMM31: base_code=7, is_ext=true, is_evex_ext=true
7640        // ZMM16: base_code=0, is_ext=false, is_evex_ext=true
7641        // 2-op form: vvvv=0, V'=false
7642        // dst=zmm31: R=ext=true, R'=evex=true
7643        // src=zmm16: B=ext=false, X=evex=true
7644        // P0: ~R=0, ~X=0, ~B=1(0x20), ~R'=0, mm=01 → 0x21
7645        // P1: W=0, vvvv=0, pp=0 → (0x0F<<3)|0x04 = 0x7C
7646        // P2: z=0, L'L=10, b=0, ~V'=1, aaa=0 → 0x48
7647        // ModRM: 0xC0|(7<<3)|0 = 0xF8
7648        let bytes = encode("vmovaps", vec![Register(Zmm31), Register(Zmm16)]);
7649        assert_eq!(bytes, vec![0x62, 0x21, 0x7C, 0x48, 0x28, 0xF8]);
7650    }
7651
7652    // ── EVEX-only instructions (no VEX equivalent) ──────────────────
7653
7654    #[test]
7655    fn test_evex_vmovdqu8_zmm0_zmm1() {
7656        // VMOVDQU8 zmm0, zmm1 → pp=F2(→3), W=0, map1, opcode=0x6F
7657        // P1: (0x0F<<3)|0x04|0x03 = 0x7C|0x03 = 0x7F
7658        let bytes = encode("vmovdqu8", vec![Register(Zmm0), Register(Zmm1)]);
7659        assert_eq!(bytes, vec![0x62, 0xF1, 0x7F, 0x48, 0x6F, 0xC1]);
7660    }
7661
7662    #[test]
7663    fn test_evex_vmovdqu16_zmm0_zmm1() {
7664        // VMOVDQU16 zmm0, zmm1 → pp=F2(→3), W=1
7665        // P1: 0x80|(0x0F<<3)|0x04|0x03 = 0xFC|0x03 = 0xFF
7666        let bytes = encode("vmovdqu16", vec![Register(Zmm0), Register(Zmm1)]);
7667        assert_eq!(bytes, vec![0x62, 0xF1, 0xFF, 0x48, 0x6F, 0xC1]);
7668    }
7669
7670    #[test]
7671    fn test_evex_vpsravq_zmm0_zmm1_zmm2() {
7672        // VPSRAVQ zmm0, zmm1, zmm2 → map2, pp=66, W=1, opcode=0x46
7673        let bytes = encode(
7674            "vpsravq",
7675            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7676        );
7677        assert_eq!(bytes, vec![0x62, 0xF2, 0xF5, 0x48, 0x46, 0xC2]);
7678    }
7679
7680    #[test]
7681    fn test_evex_vpandd_zmm0_zmm1_zmm2() {
7682        let bytes = encode(
7683            "vpandd",
7684            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7685        );
7686        assert_eq!(bytes, vec![0x62, 0xF1, 0x75, 0x48, 0xDB, 0xC2]);
7687    }
7688
7689    #[test]
7690    fn test_evex_vpord_zmm0_zmm1_zmm2() {
7691        let bytes = encode(
7692            "vpord",
7693            vec![Register(Zmm0), Register(Zmm1), Register(Zmm2)],
7694        );
7695        assert_eq!(bytes, vec![0x62, 0xF1, 0x75, 0x48, 0xEB, 0xC2]);
7696    }
7697
7698    // ── VEX instructions still work (regression) ────────────────────
7699
7700    #[test]
7701    fn test_vex_vaddps_xmm_still_works() {
7702        // VADDPS xmm0, xmm1, xmm2 should use VEX, not EVEX
7703        let bytes = encode(
7704            "vaddps",
7705            vec![Register(Xmm0), Register(Xmm1), Register(Xmm2)],
7706        );
7707        // VEX.128.0F 58 → C5 F0 58 C2 (2-byte VEX)
7708        assert_eq!(bytes[0], 0xC5, "should be 2-byte VEX prefix");
7709    }
7710
7711    #[test]
7712    fn test_vex_vaddps_ymm_still_works() {
7713        // VADDPS ymm0, ymm1, ymm2 should use VEX, not EVEX
7714        let bytes = encode(
7715            "vaddps",
7716            vec![Register(Ymm0), Register(Ymm1), Register(Ymm2)],
7717        );
7718        // VEX.256.0F 58 → C5 F4 58 C2
7719        assert_eq!(bytes[0], 0xC5, "should be 2-byte VEX prefix for ymm");
7720    }
7721
7722    // ── Compress/expand (EVEX-only) ─────────────────────────────────
7723
7724    #[test]
7725    fn test_evex_vcompressps_zmm0_zmm1() {
7726        // VCOMPRESSPS zmm0, zmm1 → map2, pp=66, W=0, opcode=0x8A
7727        let bytes = encode("vcompressps", vec![Register(Zmm0), Register(Zmm1)]);
7728        assert_eq!(bytes, vec![0x62, 0xF2, 0x7D, 0x48, 0x8A, 0xC1]);
7729    }
7730
7731    #[test]
7732    fn test_evex_vexpandps_zmm0_zmm1() {
7733        // VEXPANDPS zmm0, zmm1 → map2, pp=66, W=0, opcode=0x88
7734        let bytes = encode("vexpandps", vec![Register(Zmm0), Register(Zmm1)]);
7735        assert_eq!(bytes, vec![0x62, 0xF2, 0x7D, 0x48, 0x88, 0xC1]);
7736    }
7737
7738    // ── EVEX shuffle / pshufd with imm ──────────────────────────────
7739
7740    #[test]
7741    fn test_evex_vpshufd_zmm0_zmm1_imm() {
7742        // VPSHUFD zmm0, zmm1, 0xE4 → map1, pp=66, W=0, opcode=0x70
7743        // 3-op imm form: dst=zmm0, src=zmm1, imm=0xE4
7744        let bytes = encode(
7745            "vpshufd",
7746            vec![Register(Zmm0), Register(Zmm1), Immediate(0xE4)],
7747        );
7748        assert_eq!(bytes, vec![0x62, 0xF1, 0x7D, 0x48, 0x70, 0xC1, 0xE4]);
7749    }
7750}