zydis_rs/decoder/mod.rs
1//! # Decoder Module
2//!
3//! This module provides x86/x64 instruction decoding capabilities.
4//!
5//! ## 架构 / Architecture
6//!
7//! The decoder is organized into several sub-modules:
8//!
9//! - [`instruction`]: Core instruction data structures and attributes / 核心指令数据结构和属性
10//! - [`operand`]: Operand types (register, memory, immediate, etc.) / 操作数类型
11//! - [`prefix`]: Instruction prefix handling (REX, VEX, EVEX, etc.) / 指令前缀处理
12//! - [`modrm`]: ModR/M and SIB byte decoding / ModR/M 和 SIB 字节解码
13//! - [`opcode`]: Opcode table lookup and processing / 操作码表查找和处理
14//! - [`immediate`]: Immediate value extraction / 立即数提取
15//! - [`address`]: Memory address encoding/decoding / 内存地址编码/解码
16//!
17//! ## 特性 / Features
18//!
19//! - Zero-allocation decoding where possible / 尽可能零分配解码
20//! - Support for legacy, VEX, EVEX, and XOP encodings / 支持传统、VEX、EVEX 和 XOP 编码
21//! - Detailed operand information / 详细的操作数信息
22//! - Instruction attributes and categories / 指令属性和分类
23//! - Support for AVX-512 instructions / AVX-512 指令支持
24//!
25//! ## 示例 / Example
26//!
27//! ```rust,ignore
28//! use zydis_rs::{Decoder, MachineMode, StackWidth};
29//!
30//! let decoder = Decoder::new(MachineMode::Long64, StackWidth::Width64)?;
31//! let instruction = decoder.decode(&[0x48, 0x89, 0xE5])?;
32//! ```
33
34pub mod address;
35pub mod immediate;
36pub mod instruction;
37pub mod modrm;
38pub mod opcode;
39pub mod operand;
40pub mod prefix;
41
42// Re-export main types
43pub use instruction::{
44 BranchType, CPUFlag, ConditionCode, CpuFlags, DecodedInstruction, ExceptionClass,
45 MAX_OPERAND_COUNT, StackPointerInfo,
46};
47pub use modrm::{ModRm, Sib};
48pub use opcode::{MandatoryPrefix, OpcodeDesc, OpcodeEscape, OpcodeMap};
49pub use operand::{ImmData, MemData, Operand, OperandData, PtrData, RegData};
50pub use prefix::{DecodedPrefixes, EvexPrefix, LegacyPrefixSet, RexPrefix, VexPrefix, XopPrefix};
51
52use crate::data::instruction_def::{InstructionCategory, IsaSet};
53use crate::data::instructions_core;
54use crate::data::instructions_sse;
55use crate::error::{Error, Result};
56use crate::isa::{
57 InstructionAttributes, MachineMode, Mnemonic, OperandAccess, OperandSize, Register,
58};
59
60/// Decoder mode flags
61///
62/// Controls various decoding behaviors, similar to Zydis's `ZydisDecoderMode`.
63///
64/// # Modes
65///
66/// - **Minimal**: Only decode instruction length and basic attributes (faster)
67/// - **AMD Branches**: Use AMD's branch semantics instead of Intel's
68/// - **KNC**: Support Knights Corner instructions (deprecated)
69/// - **MPX**: Support Memory Protection Extensions
70/// - **CET**: Support Control-flow Enforcement Technology
71/// - **LZCNT**: Special handling for LZCNT instruction
72/// - **TZCNT**: Special handling for TZCNT instruction
73/// - **XBEGIN**: Automatic transaction abort for XBEGIN
74#[derive(Debug, Clone, Copy, Default)]
75pub struct DecoderModes {
76 /// Minimal decoding mode - only decode instruction length and basic attributes
77 pub minimal: bool,
78 /// AMD branch semantics - use AMD's branch behavior interpretation
79 pub amd_branches: bool,
80 /// KNC mode - Knights Corner instruction support (deprecated)
81 pub knc: bool,
82 /// MPX mode - Memory Protection Extensions support
83 pub mpx: bool,
84 /// CET mode - Control-flow Enforcement Technology support
85 pub cet: bool,
86 /// LZCNT mode - special handling for LZCNT (vs BSR)
87 pub lzcnt: bool,
88 /// TZCNT mode - special handling for TZCNT (vs BSF)
89 pub tzcnt: bool,
90 /// XBEGIN mode - automatic transaction abort behavior
91 pub xbegin: bool,
92}
93
94impl DecoderModes {
95 /// Create a new DecoderModes with all modes disabled
96 #[must_use]
97 pub const fn new() -> Self {
98 Self {
99 minimal: false,
100 amd_branches: false,
101 knc: false,
102 mpx: false,
103 cet: false,
104 lzcnt: false,
105 tzcnt: false,
106 xbegin: false,
107 }
108 }
109
110 /// Enable or disable a specific mode
111 #[must_use]
112 pub const fn with_mode(mut self, mode: DecoderMode, enabled: bool) -> Self {
113 match mode {
114 DecoderMode::Minimal => self.minimal = enabled,
115 DecoderMode::AmdBranches => self.amd_branches = enabled,
116 DecoderMode::Knc => self.knc = enabled,
117 DecoderMode::Mpx => self.mpx = enabled,
118 DecoderMode::Cet => self.cet = enabled,
119 DecoderMode::Lzcnt => self.lzcnt = enabled,
120 DecoderMode::Tzcnt => self.tzcnt = enabled,
121 DecoderMode::Xbegin => self.xbegin = enabled,
122 }
123 self
124 }
125
126 /// Check if a specific mode is enabled
127 #[must_use]
128 pub const fn is_enabled(&self, mode: DecoderMode) -> bool {
129 match mode {
130 DecoderMode::Minimal => self.minimal,
131 DecoderMode::AmdBranches => self.amd_branches,
132 DecoderMode::Knc => self.knc,
133 DecoderMode::Mpx => self.mpx,
134 DecoderMode::Cet => self.cet,
135 DecoderMode::Lzcnt => self.lzcnt,
136 DecoderMode::Tzcnt => self.tzcnt,
137 DecoderMode::Xbegin => self.xbegin,
138 }
139 }
140}
141
142/// Decoder mode enum for setting individual modes
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144pub enum DecoderMode {
145 /// Minimal decoding mode - only decode instruction length and basic attributes
146 Minimal,
147 /// AMD branch semantics - use AMD's branch behavior interpretation
148 AmdBranches,
149 /// KNC mode - Knights Corner instruction support (deprecated)
150 Knc,
151 /// MPX mode - Memory Protection Extensions support
152 Mpx,
153 /// CET mode - Control-flow Enforcement Technology support
154 Cet,
155 /// LZCNT mode - special handling for LZCNT instruction
156 Lzcnt,
157 /// TZCNT mode - special handling for TZCNT instruction
158 Tzcnt,
159 /// XBEGIN mode - automatic transaction abort behavior
160 Xbegin,
161}
162
163/// 主解码器实例 / Main decoder instance
164///
165/// 解码器保存机器模式和栈宽度的配置,用于解码指令字节。
166///
167/// The decoder holds the configuration for machine mode and stack width,
168/// and is used to decode instruction bytes.
169///
170/// # 工作流程 / Workflow
171///
172/// 1. 解析前缀(Legacy, REX, VEX, EVEX, XOP)
173/// 2. 解析操作码
174/// 3. 解析 ModR/M 和 SIB 字节(如果存在)
175/// 4. 解析偏移量(如果存在)
176/// 5. 解析立即数(如果存在)
177/// 6. 构建操作数信息
178///
179/// # 示例 / Example
180///
181/// ```rust,ignore
182/// use zydis_rs::{Decoder, MachineMode, StackWidth};
183///
184/// // 创建 64 位模式解码器 / Create 64-bit mode decoder
185/// let decoder = Decoder::new(MachineMode::Long64, StackWidth::Width64)?;
186///
187/// // 解码指令 / Decode instruction
188/// let code = &[0x48, 0x89, 0xE5]; // mov rbp, rsp
189/// let instruction = decoder.decode(code)?;
190///
191/// println!("Mnemonic: {:?}", instruction.mnemonic);
192/// println!("Length: {}", instruction.length);
193/// println!("Operand count: {}", instruction.operand_count);
194/// # Ok::<(), zydis_rs::Error>(())
195/// ```
196///
197/// # 注意事项 / Notes
198///
199/// - 解码器是无状态的,可以安全地在多个线程之间共享
200/// - 单条指令的最大长度为 15 字节
201/// - 64 位模式下会自动处理 REX 前缀
202#[derive(Debug, Clone)]
203pub struct Decoder {
204 /// Machine mode (64-bit, 32-bit, 16-bit) / 机器模式
205 machine_mode: MachineMode,
206 /// Default stack width in bits / 默认栈宽度(位)
207 #[allow(dead_code)]
208 stack_width: u8,
209 /// Default operand size in bits / 默认操作数大小(位)
210 default_operand_size: u8,
211 /// Default address size in bits / 默认地址大小(位)
212 #[allow(dead_code)]
213 default_address_size: u8,
214 /// Whether this is 64-bit mode / 是否为 64 位模式
215 is_64_bit: bool,
216 /// Decoder mode flags / 解码器模式标志
217 modes: DecoderModes,
218}
219
220impl Decoder {
221 /// 创建指定机器模式的解码器
222 ///
223 /// Create a new decoder for the specified machine mode.
224 ///
225 /// # 参数 / Arguments
226 ///
227 /// * `machine_mode` - 机器模式 / The machine mode (Long64, Protected32, etc.)
228 /// * `stack_width` - 栈宽度(位)/ The stack width in bits (64 for 64-bit mode, 32 for 32-bit mode)
229 ///
230 /// # 返回 / Returns
231 ///
232 /// 返回解码器实例或错误
233 ///
234 /// # 错误 / Errors
235 ///
236 /// 如果机器模式无效,返回 `Error::InvalidMachineMode`
237 ///
238 /// # 示例 / Example
239 ///
240 /// ```rust,ignore
241 /// use zydis_rs::{Decoder, MachineMode, StackWidth};
242 ///
243 /// let decoder = Decoder::new(MachineMode::Long64, StackWidth::Width64)?;
244 /// # Ok::<(), zydis_rs::Error>(())
245 /// ```
246 pub fn new(machine_mode: MachineMode, stack_width: u8) -> Result<Self> {
247 let (is_64_bit, default_operand_size, default_address_size) = match machine_mode {
248 MachineMode::Long64 => (true, 32, 64),
249 MachineMode::Protected32 => (false, 32, 32),
250 MachineMode::Protected16 | MachineMode::Real16 => (false, 16, 16),
251 MachineMode::Unknown => return Err(Error::InvalidMachineMode),
252 };
253
254 Ok(Self {
255 machine_mode,
256 stack_width,
257 default_operand_size,
258 default_address_size,
259 is_64_bit,
260 modes: DecoderModes::new(),
261 })
262 }
263
264 /// 创建 64 位长模式解码器
265 ///
266 /// Create a decoder for 64-bit long mode.
267 ///
268 /// # 示例 / Example
269 ///
270 /// ```rust,ignore
271 /// use zydis_rs::Decoder;
272 ///
273 /// let decoder = Decoder::new_64bit();
274 /// ```
275 #[must_use]
276 pub fn new_64bit() -> Self {
277 Self {
278 machine_mode: MachineMode::Long64,
279 stack_width: 64,
280 default_operand_size: 32,
281 default_address_size: 64,
282 is_64_bit: true,
283 modes: DecoderModes::new(),
284 }
285 }
286
287 /// 创建 32 位保护模式解码器
288 ///
289 /// Create a decoder for 32-bit protected mode.
290 ///
291 /// # 示例 / Example
292 ///
293 /// ```rust,ignore
294 /// use zydis_rs::Decoder;
295 ///
296 /// let decoder = Decoder::new_32bit();
297 /// ```
298 #[must_use]
299 pub fn new_32bit() -> Self {
300 Self {
301 machine_mode: MachineMode::Protected32,
302 stack_width: 32,
303 default_operand_size: 32,
304 default_address_size: 32,
305 is_64_bit: false,
306 modes: DecoderModes::new(),
307 }
308 }
309
310 /// 设置解码器模式
311 ///
312 /// Set a specific decoder mode.
313 ///
314 /// # 参数 / Arguments
315 ///
316 /// * `mode` - 要设置的模式 / The mode to set
317 /// * `enabled` - 是否启用该模式 / Whether to enable the mode
318 ///
319 /// # 示例 / Example
320 ///
321 /// ```rust,ignore
322 /// use zydis_rs::{Decoder, decoder::DecoderMode};
323 ///
324 /// let mut decoder = Decoder::new_64bit();
325 /// decoder.set_mode(DecoderMode::Minimal, true); // 启用最小模式
326 /// decoder.set_mode(DecoderMode::AmdBranches, false); // 禁用 AMD 分支模式
327 /// ```
328 pub fn set_mode(&mut self, mode: DecoderMode, enabled: bool) {
329 self.modes = self.modes.with_mode(mode, enabled);
330 }
331
332 /// 获取当前解码器模式
333 ///
334 /// Get the current decoder modes.
335 ///
336 /// # 返回 / Returns
337 ///
338 /// 返回当前解码器模式配置
339 #[must_use]
340 pub const fn modes(&self) -> &DecoderModes {
341 &self.modes
342 }
343
344 /// 检查特定模式是否启用
345 ///
346 /// Check if a specific mode is enabled.
347 ///
348 /// # 参数 / Arguments
349 ///
350 /// * `mode` - 要检查的模式 / The mode to check
351 ///
352 /// # 返回 / Returns
353 ///
354 /// 如果该模式启用则返回 true
355 #[must_use]
356 pub const fn is_mode_enabled(&self, mode: DecoderMode) -> bool {
357 self.modes.is_enabled(mode)
358 }
359
360 /// 获取机器模式 / Get the machine mode
361 ///
362 /// # 返回 / Returns
363 ///
364 /// 解码器配置的机器模式
365 #[must_use]
366 pub const fn machine_mode(&self) -> MachineMode {
367 self.machine_mode
368 }
369
370 /// 获取栈宽度(位)/ Get the stack width
371 #[must_use]
372 pub const fn stack_width(&self) -> u8 {
373 self.stack_width
374 }
375
376 /// 检查是否为 64 位模式 / Check if this is 64-bit mode
377 #[must_use]
378 pub const fn is_64_bit(&self) -> bool {
379 self.is_64_bit
380 }
381
382 /// 从字节解码单条指令
383 ///
384 /// Decode a single instruction from bytes.
385 ///
386 /// # 参数 / Arguments
387 ///
388 /// * `bytes` - 要解码的指令字节 / The instruction bytes to decode
389 ///
390 /// # 返回 / Returns
391 ///
392 /// 成功时返回 `Ok(DecodedInstruction)`,失败时返回错误
393 ///
394 /// # 错误 / Errors
395 ///
396 /// - `Error::InsufficientBytes` - 字节不足,无法构成有效指令
397 /// - `Error::InvalidOpcode` - 无效的操作码
398 /// - `Error::InstructionTooLong` - 指令长度超过 15 字节
399 ///
400 /// # 示例 / Example
401 ///
402 /// ```rust,ignore
403 /// use zydis_rs::Decoder;
404 ///
405 /// let decoder = Decoder::new_64bit();
406 /// let instruction = decoder.decode(&[0x48, 0x89, 0xE5])?; // MOV rbp, rsp
407 /// # Ok::<(), zydis_rs::Error>(())
408 /// ```
409 pub fn decode(&self, bytes: &[u8]) -> Result<DecodedInstruction> {
410 self.decode_with_address(bytes, 0)
411 }
412
413 /// 解码单条指令并指定地址
414 ///
415 /// Decode a single instruction with a specific address.
416 ///
417 /// # 参数 / Arguments
418 ///
419 /// * `bytes` - 要解码的指令字节 / The instruction bytes to decode
420 /// * `address` - 指令地址(用于 RIP 相对寻址)/ The address of the instruction (for RIP-relative addressing)
421 ///
422 /// # 返回 / Returns
423 ///
424 /// 成功时返回解码后的指令,失败时返回错误
425 ///
426 /// # 注意 / Notes
427 ///
428 /// 在 64 位模式下,某些指令(如 `[rip + disp32]`)需要知道指令地址才能正确解析。
429 /// 使用此方法可以确保 RIP 相对寻址被正确处理。
430 ///
431 /// # 示例 / Example
432 ///
433 /// ```rust,ignore
434 /// use zydis_rs::Decoder;
435 ///
436 /// let decoder = Decoder::new_64bit();
437 /// // RIP 相对寻址需要指定地址 / RIP-relative addressing requires address
438 /// let instruction = decoder.decode_with_address(&[0x48, 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00], 0x1000)?;
439 /// # Ok::<(), zydis_rs::Error>(())
440 /// ```
441 pub fn decode_with_address(&self, bytes: &[u8], address: u64) -> Result<DecodedInstruction> {
442 if bytes.is_empty() {
443 return Err(Error::InsufficientBytes);
444 }
445
446 let mut instr = DecodedInstruction::new();
447 instr.machine_mode = self.machine_mode;
448 instr.address = address;
449
450 let mut offset = 0usize;
451
452 // Step 1: Parse prefixes
453 offset = self.parse_prefixes(bytes, offset, &mut instr)?;
454
455 // Step 2: Parse opcode
456 offset = self.parse_opcode(bytes, offset, &mut instr)?;
457
458 // Step 3: Parse ModRM (if present)
459 offset = self.parse_modrm(bytes, offset, &mut instr)?;
460
461 // Step 4: Parse SIB (if present)
462 offset = self.parse_sib(bytes, offset, &mut instr)?;
463
464 // Step 5: Parse displacement (if present)
465 offset = self.parse_displacement(bytes, offset, &mut instr)?;
466
467 // Step 6: Parse immediate (if present)
468 offset = self.parse_immediate(bytes, offset, &mut instr)?;
469
470 // Calculate instruction length
471 instr.length = offset as u8;
472
473 // Copy raw bytes
474 if offset <= 15 {
475 instr.bytes[..offset].copy_from_slice(&bytes[..offset]);
476 } else {
477 return Err(Error::InstructionTooLong);
478 }
479
480 // Build operands based on instruction type
481 self.build_operands(&mut instr)?;
482
483 // Calculate effective operand and address sizes
484 self.calculate_effective_sizes(&mut instr);
485
486 Ok(instr)
487 }
488
489 /// Parse instruction prefixes
490 fn parse_prefixes(
491 &self,
492 bytes: &[u8],
493 mut offset: usize,
494 instr: &mut DecodedInstruction,
495 ) -> Result<usize> {
496 let prefixes = &mut instr.prefixes;
497
498 // Parse legacy prefixes
499 while offset < bytes.len() {
500 let byte = bytes[offset];
501
502 if prefix::is_legacy_prefix(byte) {
503 if let Some((kind, seg)) = prefix::parse_legacy_prefix(byte) {
504 match kind {
505 prefix::LegacyPrefixKind::Lock => prefixes.legacy.has_lock = true,
506 prefix::LegacyPrefixKind::Rep => prefixes.legacy.has_rep = true,
507 prefix::LegacyPrefixKind::Repne => prefixes.legacy.has_repne = true,
508 prefix::LegacyPrefixKind::OperandSize => {
509 prefixes.legacy.has_operand_size = true
510 }
511 prefix::LegacyPrefixKind::AddressSize => {
512 prefixes.legacy.has_address_size = true
513 }
514 prefix::LegacyPrefixKind::Segment => prefixes.legacy.segment = seg,
515 }
516 prefixes.prefix_count += 1;
517 offset += 1;
518 } else {
519 break;
520 }
521 } else if self.is_64_bit && RexPrefix::is_rex(byte) {
522 // REX prefix (64-bit mode only)
523 prefixes.rex = RexPrefix::from_byte(byte);
524 prefixes.prefix_count += 1;
525 offset += 1;
526 break; // REX must be last prefix before opcode
527 } else if byte == 0xC4 || byte == 0xC5 {
528 // VEX prefix
529 if offset + 1 >= bytes.len() {
530 return Err(Error::InsufficientBytes);
531 }
532
533 if byte == 0xC5 {
534 // 2-byte VEX
535 prefixes.vex = VexPrefix::from_2byte(bytes[offset + 1]);
536 prefixes.prefix_count += 2;
537 offset += 2;
538 } else {
539 // 3-byte VEX
540 if offset + 2 >= bytes.len() {
541 return Err(Error::InsufficientBytes);
542 }
543 prefixes.vex = VexPrefix::from_3byte(bytes[offset + 1], bytes[offset + 2]);
544 prefixes.prefix_count += 3;
545 offset += 3;
546 }
547 break; // VEX replaces REX and mandatory prefixes
548 } else if byte == 0x62 {
549 // EVEX prefix (AVX-512)
550 // Format: 0x62 P0 P1 P2 (4 bytes total)
551 if offset + 3 >= bytes.len() {
552 return Err(Error::InsufficientBytes);
553 }
554 prefixes.evex = EvexPrefix::from_bytes(
555 bytes[offset + 1], // P0
556 bytes[offset + 2], // P1
557 bytes[offset + 3], // P2 (contains z, L'L, b, V', aaa)
558 );
559 prefixes.prefix_count += 4; // 62 + P0 + P1 + P2 = 4 bytes
560 offset += 4;
561 break;
562 } else if byte == 0x8F {
563 // XOP prefix (AMD)
564 if offset + 2 >= bytes.len() {
565 return Err(Error::InsufficientBytes);
566 }
567 prefixes.xop = XopPrefix::from_bytes(bytes[offset + 1], bytes[offset + 2]);
568 prefixes.prefix_count += 3;
569 offset += 3;
570 break;
571 } else {
572 break;
573 }
574 }
575
576 Ok(offset)
577 }
578
579 /// Parse opcode bytes
580 fn parse_opcode(
581 &self,
582 bytes: &[u8],
583 mut offset: usize,
584 instr: &mut DecodedInstruction,
585 ) -> Result<usize> {
586 if offset >= bytes.len() {
587 return Err(Error::InsufficientBytes);
588 }
589
590 // Handle VEX-encoded instructions
591 if instr.prefixes.has_vex() {
592 return self.parse_vex_opcode(bytes, offset, instr);
593 }
594
595 // Handle EVEX-encoded instructions (AVX-512)
596 if instr.prefixes.has_evex() {
597 return self.parse_evex_opcode(bytes, offset, instr);
598 }
599
600 let first_byte = bytes[offset];
601 instr.opcode = first_byte;
602 offset += 1;
603
604 // Check for two-byte escape (0x0F)
605 if first_byte == 0x0F {
606 if offset >= bytes.len() {
607 return Err(Error::InsufficientBytes);
608 }
609
610 let second_byte = bytes[offset];
611 offset += 1;
612
613 // Check for 3DNow! opcode escape (0x0F 0x0F)
614 // 3DNow! format: 0F 0F ModRM imm8 - the actual opcode is in the trailing imm8
615 if second_byte == 0x0F {
616 instr.opcode_map = OpcodeMap::Map0F;
617 instr.opcode = second_byte;
618
619 // Parse ModRM byte
620 if offset >= bytes.len() {
621 return Err(Error::InsufficientBytes);
622 }
623 instr.modrm = Some(bytes[offset]);
624 let modrm = ModRm::from_byte(bytes[offset]);
625 offset += 1;
626
627 // Parse SIB if needed
628 if modrm.has_sib() {
629 if offset >= bytes.len() {
630 return Err(Error::InsufficientBytes);
631 }
632 instr.sib = Some(bytes[offset]);
633 offset += 1;
634 }
635
636 // Parse displacement if needed (based on mod field)
637 let disp_size = if modrm.mod_field == 1 {
638 1 // disp8
639 } else if modrm.mod_field == 2
640 || (modrm.mod_field == 0
641 && (modrm.rm == 5
642 || (modrm.has_sib() && instr.sib.is_some_and(|s| (s & 0x07) == 5))))
643 {
644 4 // disp32, RIP-relative, or SIB with no base
645 } else {
646 0 // No displacement
647 };
648
649 if disp_size > 0 {
650 if offset + disp_size > bytes.len() {
651 return Err(Error::InsufficientBytes);
652 }
653
654 instr.displacement = match disp_size {
655 1 => i64::from(bytes[offset] as i8),
656 4 => i64::from(i32::from_le_bytes([
657 bytes[offset],
658 bytes[offset + 1],
659 bytes[offset + 2],
660 bytes[offset + 3],
661 ])),
662 _ => 0,
663 };
664 instr.disp_size = disp_size as u8;
665 offset += disp_size;
666 }
667
668 // Read the trailing imm8 which contains the actual 3DNow! opcode
669 if offset >= bytes.len() {
670 return Err(Error::InsufficientBytes);
671 }
672 let _opcode_3dnow = bytes[offset];
673 instr.immediate = u64::from(bytes[offset]);
674 instr.imm_size = 1;
675 offset += 1;
676
677 // Set instruction as a generic 3DNow! instruction
678 // The actual mnemonic would be determined by the opcode in imm8
679 instr.mnemonic = Mnemonic::Invalid; // Mark as needing 3DNow! table lookup
680 instr.category = InstructionCategory::SIMD;
681 instr.isa_set = IsaSet::Amd3dnow;
682 instr.length = offset as u8;
683
684 return Ok(offset);
685 }
686
687 // Check for three-byte escapes (0x0F 0x38 or 0x0F 0x3A)
688 if second_byte == 0x38 {
689 // 0F 38 xx three-byte opcode
690 instr.opcode_map = OpcodeMap::Map0F38;
691 if offset >= bytes.len() {
692 return Err(Error::InsufficientBytes);
693 }
694 instr.opcode = bytes[offset];
695 offset += 1;
696
697 // Look up in 0F38 opcode map
698 let mandatory_prefix = Self::get_mandatory_prefix(instr);
699 if let Some(def) = instructions_core::lookup_0f38(instr.opcode, mandatory_prefix) {
700 Self::apply_extended_instruction_def(instr, def);
701 } else {
702 instr.mnemonic = Mnemonic::Invalid;
703 instr.category = InstructionCategory::Invalid;
704 }
705 return Ok(offset);
706 }
707
708 if second_byte == 0x3A {
709 // 0F 3A xx three-byte opcode
710 instr.opcode_map = OpcodeMap::Map0F3A;
711 if offset >= bytes.len() {
712 return Err(Error::InsufficientBytes);
713 }
714 instr.opcode = bytes[offset];
715 offset += 1;
716
717 // Look up in 0F3A opcode map
718 let mandatory_prefix = Self::get_mandatory_prefix(instr);
719 if let Some(def) = instructions_core::lookup_0f3a(instr.opcode, mandatory_prefix) {
720 Self::apply_extended_instruction_def(instr, def);
721 } else {
722 instr.mnemonic = Mnemonic::Invalid;
723 instr.category = InstructionCategory::Invalid;
724 }
725 return Ok(offset);
726 }
727
728 // Two-byte opcode (0x0F map)
729 instr.opcode_map = OpcodeMap::Map0F;
730 instr.opcode = second_byte;
731
732 // Try SSE instructions first (most common case for 0F opcodes)
733 let has_66 = instr.prefixes.has_operand_size();
734 let has_f2 = instr.prefixes.has_repne();
735 let has_f3 = instr.prefixes.has_rep();
736
737 // Try to match SSE instruction
738 if let Some(sse_def) =
739 instructions_sse::lookup_sse_instruction(second_byte, has_66, has_f2, has_f3)
740 {
741 instr.mnemonic = sse_def.def.mnemonic;
742 instr.category = sse_def.def.category;
743 instr.isa_set = sse_def.def.isa_set;
744 instr.attributes = sse_def.def.attributes;
745 instr.operand_count = sse_def.def.operand_count;
746 return Ok(offset);
747 }
748
749 // Special handling for 0F 01 table - uses ModRM to determine instruction
750 // ModRM byte format: mod(2) reg(3) rm(3)
751 // These instructions are: SGDT, SIDT, LGDT, LIDT, SMSW, LMSW, INVLPG, SWAPGS, RDTSCP, VT-x, SVM, etc.
752 if second_byte == 0x01 {
753 if offset >= bytes.len() {
754 return Err(Error::InsufficientBytes);
755 }
756 let modrm = ModRm::from_byte(bytes[offset]);
757 instr.modrm = Some(modrm.raw);
758 instr.attributes |= InstructionAttributes::HAS_MODRM;
759 offset += 1;
760
761 // Determine instruction based on full ModRM byte
762 // The instruction is determined by both reg field and rm field when mod=11
763 //
764 // Key encodings (mod=11):
765 // 0F 01 C0 (C0=11-000-000): ENCLV - reg=0, rm=0
766 // 0F 01 C1 (C1=11-000-001): VMCALL - reg=0, rm=1
767 // 0F 01 C2 (C2=11-000-010): VMLAUNCH - reg=0, rm=2
768 // 0F 01 C3 (C3=11-000-011): VMRESUME - reg=0, rm=3
769 // 0F 01 C4 (C4=11-000-100): VMXOFF - reg=0, rm=4
770 // 0F 01 C5 (C5=11-000-101): - reg=0, rm=5
771 // 0F 01 C6 (C6=11-000-110): - reg=0, rm=6
772 // 0F 01 C7 (C7=11-000-111): VMCALL/ENCLV - reg=0, rm=7 (actually SWAPGS in 64-bit?)
773 // 0F 01 D0 (D0=11-010-000): XGETBV - reg=2, rm=0
774 // 0F 01 D1 (D1=11-010-001): XSETBV - reg=2, rm=1
775 // 0F 01 D4 (D4=11-010-100): CLAC - reg=2, rm=4
776 // 0F 01 D5 (D5=11-010-101): STAC - reg=2, rm=5
777 // 0F 01 D7 (D7=11-010-111): ENCLU - reg=2, rm=7
778 // 0F 01 D8 (D8=11-011-000): VMRUN - reg=3, rm=0 (AMD SVM)
779 // 0F 01 D9 (D9=11-011-001): VMMCALL - reg=3, rm=1 (AMD SVM)
780 // 0F 01 DA (DA=11-011-010): VMLOAD - reg=3, rm=2 (AMD SVM)
781 // 0F 01 DB (DB=11-011-011): VMSAVE - reg=3, rm=3 (AMD SVM)
782 // 0F 01 DC (DC=11-011-100): STGI - reg=3, rm=4 (AMD SVM)
783 // 0F 01 DD (DD=11-011-101): CLGI - reg=3, rm=5 (AMD SVM)
784 // 0F 01 CF (CF=11-000-111): ENCLS - reg=0, rm=7 (SGX)
785 // 0F 01 FA (FA=11-111-010): SERIALIZE - reg=7, rm=2
786 // 0F 01 F8 (F8=11-111-000): SWAPGS/MONITOR - reg=7, rm=0
787 // 0F 01 F9 (F9=11-111-001): RDTSCP/MWAIT - reg=7, rm=1
788 // 0F 01 F2 (F2=11-111-010): CLAC... no wait that's SERIALIZE at FA
789
790 instr.mnemonic = match modrm.reg {
791 0 => {
792 // reg=0: SGDT (mod!=11) or VT-x/SGX instructions (mod=11)
793 if modrm.mod_field == 3 {
794 match modrm.rm {
795 0 => Mnemonic::ENCLV, // 0F 01 C0
796 1 => Mnemonic::VMCALL, // 0F 01 C1
797 2 => Mnemonic::VMLAUNCH, // 0F 01 C2
798 3 => Mnemonic::VMRESUME, // 0F 01 C3
799 4 => Mnemonic::VMXOFF, // 0F 01 C4
800 7 => Mnemonic::ENCLS, // 0F 01 CF (SGX Enclave System)
801 _ => Mnemonic::Invalid,
802 }
803 } else {
804 Mnemonic::SGDT
805 }
806 }
807 1 => {
808 // reg=1: SIDT (mod!=11)
809 if modrm.mod_field == 3 {
810 Mnemonic::Invalid // No valid mod=11 instructions with reg=1
811 } else {
812 Mnemonic::SIDT
813 }
814 }
815 2 => {
816 // reg=2: LGDT (mod!=11) or XGETBV/XSETBV/ENCLU (mod=11)
817 if modrm.mod_field == 3 {
818 match modrm.rm {
819 0 => Mnemonic::XGETBV, // 0F 01 D0
820 1 => Mnemonic::XSETBV, // 0F 01 D1
821 4 => Mnemonic::CLAC, // 0F 01 D4
822 5 => Mnemonic::STAC, // 0F 01 D5
823 7 => Mnemonic::ENCLU, // 0F 01 D7 (SGX Enclave User)
824 _ => Mnemonic::Invalid,
825 }
826 } else {
827 Mnemonic::LGDT
828 }
829 }
830 3 => {
831 // reg=3: LIDT (mod!=11) or AMD SVM instructions (mod=11)
832 if modrm.mod_field == 3 {
833 match modrm.rm {
834 0 => Mnemonic::VMRUN, // 0F 01 D8 (AMD SVM)
835 1 => Mnemonic::VMMCALL, // 0F 01 D9 (AMD SVM)
836 2 => Mnemonic::VMLOAD, // 0F 01 DA (AMD SVM)
837 3 => Mnemonic::VMSAVE, // 0F 01 DB (AMD SVM)
838 4 => Mnemonic::STGI, // 0F 01 DC (AMD SVM)
839 5 => Mnemonic::CLGI, // 0F 01 DD (AMD SVM)
840 _ => Mnemonic::Invalid,
841 }
842 } else {
843 Mnemonic::LIDT
844 }
845 }
846 4 => {
847 // reg=4: SMSW (Store Machine Status Word)
848 Mnemonic::SMSW
849 }
850 5 => {
851 // reg=5: LMSW (mod!=11) or VMREAD/VMWRITE (mod=11)
852 if modrm.mod_field == 3 {
853 match modrm.rm {
854 0 => Mnemonic::VMREAD, // 0F 01 F8? No, let me check...
855 1 => Mnemonic::VMWRITE,
856 _ => Mnemonic::Invalid,
857 }
858 } else {
859 Mnemonic::LMSW
860 }
861 }
862 6 => {
863 // reg=6: INVLPG (mod!=11)
864 if modrm.mod_field == 3 {
865 Mnemonic::Invalid
866 } else {
867 Mnemonic::INVLPG
868 }
869 }
870 7 => {
871 // reg=7: INVLPG (mod!=11) or SWAPGS/RDTSCP/SERIALIZE (mod=11)
872 if modrm.mod_field == 3 {
873 match modrm.rm {
874 0 => {
875 // SWAPGS (64-bit) / MONITOR (32-bit)
876 if self.is_64_bit {
877 Mnemonic::SWAPGS
878 } else {
879 Mnemonic::MONITOR
880 }
881 }
882 1 => {
883 // RDTSCP (64-bit) / MWAIT (32-bit)
884 if self.is_64_bit {
885 Mnemonic::RDTSCP
886 } else {
887 Mnemonic::MWAIT
888 }
889 }
890 2 => Mnemonic::SERIALIZE, // 0F 01 FA
891 3 => Mnemonic::CLAC, // Some docs say this
892 4 => Mnemonic::STAC,
893 _ => Mnemonic::Invalid,
894 }
895 } else {
896 Mnemonic::INVLPG
897 }
898 }
899 _ => Mnemonic::Invalid,
900 };
901
902 instr.category = InstructionCategory::System;
903 instr.isa_set = IsaSet::I86;
904 instr.operand_count = if matches!(instr.mnemonic,
905 Mnemonic::RDTSCP | Mnemonic::SWAPGS | Mnemonic::CLAC | Mnemonic::STAC |
906 Mnemonic::VMCALL | Mnemonic::VMLAUNCH | Mnemonic::VMRESUME | Mnemonic::VMXOFF |
907 Mnemonic::CLGI | Mnemonic::STGI | Mnemonic::VMRUN | Mnemonic::VMMCALL |
908 Mnemonic::VMLOAD | Mnemonic::VMSAVE | Mnemonic::ENCLS | Mnemonic::ENCLU |
909 Mnemonic::ENCLV | Mnemonic::SERIALIZE | Mnemonic::XGETBV | Mnemonic::XSETBV) {
910 0
911 } else {
912 1
913 };
914 return Ok(offset);
915 }
916
917 // Special handling for 0F 31 (RDTSC)
918 if second_byte == 0x31 {
919 instr.mnemonic = Mnemonic::RDTSC;
920 instr.category = InstructionCategory::System;
921 instr.isa_set = IsaSet::I86;
922 instr.operand_count = 0;
923 return Ok(offset);
924 }
925
926 // Special handling for 0F 33 (RDPMC)
927 if second_byte == 0x33 {
928 instr.mnemonic = Mnemonic::RDPMC;
929 instr.category = InstructionCategory::System;
930 instr.isa_set = IsaSet::I86;
931 instr.operand_count = 0;
932 return Ok(offset);
933 }
934
935 // Special handling for 0F AE group (FXSAVE, FXRSTOR, LDMXCSR, STMXCSR, CLFLUSH, etc.)
936 // These instructions use ModRM.reg to determine the actual instruction
937 // With 66 prefix: CLFLUSHOPT (reg=7), CLWB (reg=6)
938 if second_byte == 0xAE {
939 if offset >= bytes.len() {
940 return Err(Error::InsufficientBytes);
941 }
942 let modrm = ModRm::from_byte(bytes[offset]);
943 instr.modrm = Some(modrm.raw);
944 instr.attributes |= InstructionAttributes::HAS_MODRM;
945 offset += 1;
946
947 let has_66 = instr.prefixes.has_operand_size();
948
949 instr.mnemonic = match modrm.reg {
950 0 => Mnemonic::FXSAVE, // FXSAVE m512
951 1 => Mnemonic::FXRSTOR, // FXRSTOR m512
952 2 => Mnemonic::LDMXCSR, // LDMXCSR m32
953 3 => Mnemonic::STMXCSR, // STMXCSR m32
954 4 => Mnemonic::XSAVE, // XSAVE mem
955 5 => Mnemonic::XRSTOR, // XRSTOR mem
956 6 => {
957 // XSAVEOPT (no prefix) / CLWB (66 prefix)
958 if has_66 {
959 Mnemonic::CLWB
960 } else {
961 Mnemonic::XSAVEOPT
962 }
963 }
964 7 => {
965 // CLFLUSH (no prefix, mod != 11) / CLFLUSHOPT (66 prefix, mod != 11)
966 if has_66 {
967 Mnemonic::CLFLUSHOPT
968 } else {
969 Mnemonic::CLFLUSH
970 }
971 }
972 _ => Mnemonic::Invalid,
973 };
974 instr.category = InstructionCategory::System;
975 instr.isa_set = IsaSet::I86;
976 instr.operand_count = 1;
977 return Ok(offset);
978 }
979
980 // Special handling for 0F 78 (VMREAD) - Intel VT-x
981 // VMREAD r64, r64 - Read field from VMCS
982 if second_byte == 0x78 {
983 if offset >= bytes.len() {
984 return Err(Error::InsufficientBytes);
985 }
986 let modrm = ModRm::from_byte(bytes[offset]);
987 instr.modrm = Some(modrm.raw);
988 instr.attributes |= InstructionAttributes::HAS_MODRM;
989 offset += 1;
990
991 instr.mnemonic = Mnemonic::VMREAD;
992 instr.category = InstructionCategory::System;
993 instr.isa_set = IsaSet::I86;
994 instr.operand_count = 0; // Uses implicit registers
995 return Ok(offset);
996 }
997
998 // Special handling for 0F 79 (VMWRITE) - Intel VT-x
999 // VMWRITE r64, r64 - Write field to VMCS
1000 if second_byte == 0x79 {
1001 if offset >= bytes.len() {
1002 return Err(Error::InsufficientBytes);
1003 }
1004 let modrm = ModRm::from_byte(bytes[offset]);
1005 instr.modrm = Some(modrm.raw);
1006 instr.attributes |= InstructionAttributes::HAS_MODRM;
1007 offset += 1;
1008
1009 instr.mnemonic = Mnemonic::VMWRITE;
1010 instr.category = InstructionCategory::System;
1011 instr.isa_set = IsaSet::I86;
1012 instr.operand_count = 0; // Uses implicit registers
1013 return Ok(offset);
1014 }
1015
1016 // Special handling for 0F 0D group (PREFETCH instructions)
1017 // 0F 0D /0 = PREFETCH (3DNow!)
1018 // 0F 0D /1 = PREFETCHW
1019 // 0F 0D /2 = PREFETCHWT1
1020 if second_byte == 0x0D {
1021 if offset >= bytes.len() {
1022 return Err(Error::InsufficientBytes);
1023 }
1024 let modrm = ModRm::from_byte(bytes[offset]);
1025 instr.modrm = Some(modrm.raw);
1026 instr.attributes |= InstructionAttributes::HAS_MODRM;
1027 offset += 1;
1028
1029 instr.mnemonic = match modrm.reg {
1030 0 => Mnemonic::PREFETCH, // PREFETCH (3DNow! style)
1031 1 => Mnemonic::PREFETCHW, // PREFETCHW
1032 2 => Mnemonic::PREFETCHWT1, // PREFETCHWT1
1033 _ => Mnemonic::PREFETCH, // Default to PREFETCH
1034 };
1035 instr.category = InstructionCategory::System;
1036 instr.isa_set = IsaSet::I86;
1037 instr.operand_count = 1;
1038 return Ok(offset);
1039 }
1040
1041 // Special handling for 0F 1C group (CLDEMOTE and other hint nop)
1042 // 0F 1C /0 = CLDEMOTE
1043 if second_byte == 0x1C {
1044 if offset >= bytes.len() {
1045 return Err(Error::InsufficientBytes);
1046 }
1047 let modrm = ModRm::from_byte(bytes[offset]);
1048 instr.modrm = Some(modrm.raw);
1049 instr.attributes |= InstructionAttributes::HAS_MODRM;
1050 offset += 1;
1051
1052 instr.mnemonic = match modrm.reg {
1053 0 => Mnemonic::CLDEMOTE, // CLDEMOTE m8
1054 _ => Mnemonic::NOP, // Hint NOP for other reg values
1055 };
1056 instr.category = InstructionCategory::System;
1057 instr.isa_set = IsaSet::I86;
1058 instr.operand_count = 1;
1059 return Ok(offset);
1060 }
1061
1062 // Special handling for 0F C7 group (CMPXCHG8B, CMPXCHG16B, RDPID, etc.)
1063 // These instructions use ModRM.reg to determine the actual instruction
1064 if second_byte == 0xC7 {
1065 if offset >= bytes.len() {
1066 return Err(Error::InsufficientBytes);
1067 }
1068 let modrm = ModRm::from_byte(bytes[offset]);
1069 instr.modrm = Some(modrm.raw);
1070 instr.attributes |= InstructionAttributes::HAS_MODRM;
1071 offset += 1;
1072
1073 // Check for F3 prefix (RDPID)
1074 if instr.prefixes.has_rep() {
1075 // F3 0F C7 /7 - RDPID
1076 if modrm.reg == 7 && modrm.mod_field == 3 {
1077 instr.mnemonic = Mnemonic::RDPID;
1078 instr.category = InstructionCategory::System;
1079 instr.isa_set = IsaSet::I86;
1080 instr.operand_count = 1;
1081 return Ok(offset);
1082 }
1083 }
1084
1085 instr.mnemonic = match modrm.reg {
1086 1 => {
1087 // CMPXCHG8B (32-bit) / CMPXCHG16B (64-bit with REX.W)
1088 if self.is_64_bit && instr.prefixes.effective_w() {
1089 Mnemonic::CMPXCHG16B
1090 } else {
1091 Mnemonic::CMPXCHG8B
1092 }
1093 }
1094 _ => Mnemonic::Invalid,
1095 };
1096 instr.category = InstructionCategory::DataTransfer;
1097 instr.isa_set = IsaSet::I86;
1098 instr.operand_count = 1;
1099 return Ok(offset);
1100 }
1101
1102 // Try extended instruction definitions for 0F map
1103 let mandatory_prefix = Self::get_mandatory_prefix(instr);
1104 if let Some(def) = instructions_core::lookup_0f(second_byte, mandatory_prefix) {
1105 Self::apply_extended_instruction_def(instr, def);
1106 return Ok(offset);
1107 }
1108
1109 // Two-byte opcode not recognized
1110 instr.mnemonic = Mnemonic::Invalid;
1111 instr.category = InstructionCategory::Invalid;
1112 return Ok(offset);
1113 }
1114
1115 // Single-byte opcode - look up in core instruction table
1116 instr.opcode_map = OpcodeMap::Default;
1117
1118 // Special handling for Group 1 opcodes (0x80-0x83)
1119 // These opcodes use ModRM.reg to determine the actual instruction:
1120 // 0=ADD, 1=OR, 2=ADC, 3=SBB, 4=AND, 5=SUB, 6=XOR, 7=CMP
1121 if (0x80..=0x83).contains(&first_byte) {
1122 // Mark as having ModRM and immediate
1123 instr.attributes |= InstructionAttributes::HAS_MODRM;
1124
1125 // Determine immediate size based on opcode
1126 match first_byte {
1127 0x80 | 0x82 => {
1128 // 8-bit immediate
1129 instr.attributes |= InstructionAttributes::HAS_IMM8;
1130 }
1131 0x81 => {
1132 // 16/32-bit immediate (64-bit with REX.W)
1133 instr.attributes |= InstructionAttributes::HAS_IMM;
1134 }
1135 0x83 => {
1136 // 8-bit immediate sign-extended to 16/32/64 bits
1137 instr.attributes |= InstructionAttributes::HAS_IMM8;
1138 }
1139 _ => {}
1140 }
1141
1142 // Need to read ModRM to determine the actual instruction
1143 if offset >= bytes.len() {
1144 return Err(Error::InsufficientBytes);
1145 }
1146 let modrm = ModRm::from_byte(bytes[offset]);
1147 instr.modrm = Some(modrm.raw);
1148 offset += 1; // Advance past ModRM byte
1149
1150 // Set mnemonic based on ModRM.reg field
1151 instr.mnemonic = match modrm.reg {
1152 0 => Mnemonic::ADD,
1153 1 => Mnemonic::OR,
1154 2 => Mnemonic::ADC,
1155 3 => Mnemonic::SBB,
1156 4 => Mnemonic::AND,
1157 5 => Mnemonic::SUB,
1158 6 => Mnemonic::XOR,
1159 7 => Mnemonic::CMP,
1160 _ => Mnemonic::Invalid,
1161 };
1162 instr.category = InstructionCategory::Arithmetic;
1163 instr.isa_set = IsaSet::I86;
1164 instr.operand_count = 2;
1165
1166 return Ok(offset);
1167 }
1168
1169 // Special handling for Group 2 opcodes (Shift/Rotate: C0, C1, D0, D1, D2, D3)
1170 // These opcodes use ModRM.reg to determine the actual instruction:
1171 // 0=ROL, 1=ROR, 2=RCL, 3=RCR, 4=SHL/SAL, 5=SHR, 6=reserved, 7=SAR
1172 if matches!(first_byte, 0xC0 | 0xC1 | 0xD0 | 0xD1 | 0xD2 | 0xD3) {
1173 // Mark as having ModRM
1174 instr.attributes |= InstructionAttributes::HAS_MODRM;
1175
1176 // Determine operand size and immediate based on opcode
1177 match first_byte {
1178 0xC0 => {
1179 // Shift/Rotate r/m8, imm8
1180 instr.attributes |= InstructionAttributes::HAS_IMM8;
1181 }
1182 0xC1 => {
1183 // Shift/Rotate r/m16/32/64, imm8
1184 instr.attributes |= InstructionAttributes::HAS_IMM8;
1185 }
1186 0xD0 => {
1187 // Shift/Rotate r/m8, 1 (no immediate, count is implicit 1)
1188 }
1189 0xD1 => {
1190 // Shift/Rotate r/m16/32/64, 1 (no immediate, count is implicit 1)
1191 }
1192 0xD2 => {
1193 // Shift/Rotate r/m8, CL (count in CL register)
1194 }
1195 0xD3 => {
1196 // Shift/Rotate r/m16/32/64, CL (count in CL register)
1197 }
1198 _ => {}
1199 }
1200
1201 // Need to read ModRM to determine the actual instruction
1202 if offset >= bytes.len() {
1203 return Err(Error::InsufficientBytes);
1204 }
1205 let modrm = ModRm::from_byte(bytes[offset]);
1206 instr.modrm = Some(modrm.raw);
1207 offset += 1; // Advance past ModRM byte
1208
1209 // Set mnemonic based on ModRM.reg field
1210 instr.mnemonic = match modrm.reg {
1211 0 => Mnemonic::ROL,
1212 1 => Mnemonic::ROR,
1213 2 => Mnemonic::RCL,
1214 3 => Mnemonic::RCR,
1215 4 => Mnemonic::SHL, // Also SAL
1216 5 => Mnemonic::SHR,
1217 6 => Mnemonic::SHL, // Reserved in some docs, treated as SHL
1218 7 => Mnemonic::SAR,
1219 _ => Mnemonic::Invalid,
1220 };
1221 instr.category = InstructionCategory::ShiftRotate;
1222 instr.isa_set = IsaSet::I86;
1223 instr.operand_count = 2;
1224
1225 return Ok(offset);
1226 }
1227
1228 // Special handling for Group 3 opcodes (0xF6, 0xF7) - Unary operations
1229 // These opcodes use ModRM.reg to determine the actual instruction:
1230 // 0=TEST r/m, imm, 2=NOT r/m, 3=NEG r/m, 4=MUL r/m, 5=IMUL r/m, 6=DIV r/m, 7=IDIV r/m
1231 if first_byte == 0xF6 || first_byte == 0xF7 {
1232 // Mark as having ModRM
1233 instr.attributes |= InstructionAttributes::HAS_MODRM;
1234
1235 // Determine operand size: F6 is 8-bit, F7 is 16/32/64-bit
1236 let is_8bit = first_byte == 0xF6;
1237
1238 // Need to read ModRM to determine the actual instruction
1239 if offset >= bytes.len() {
1240 return Err(Error::InsufficientBytes);
1241 }
1242 let modrm = ModRm::from_byte(bytes[offset]);
1243 instr.modrm = Some(modrm.raw);
1244 offset += 1; // Advance past ModRM byte
1245
1246 // Set mnemonic based on ModRM.reg field
1247 // For reg=0 (TEST), we also need an immediate
1248 match modrm.reg {
1249 0 => {
1250 // TEST r/m, imm
1251 instr.mnemonic = Mnemonic::TEST;
1252 instr.category = InstructionCategory::Logical;
1253 if is_8bit {
1254 instr.attributes |= InstructionAttributes::HAS_IMM8;
1255 } else {
1256 instr.attributes |= InstructionAttributes::HAS_IMM;
1257 }
1258 }
1259 2 => {
1260 // NOT r/m
1261 instr.mnemonic = Mnemonic::NOT;
1262 instr.category = InstructionCategory::Logical;
1263 }
1264 3 => {
1265 // NEG r/m
1266 instr.mnemonic = Mnemonic::NEG;
1267 instr.category = InstructionCategory::Arithmetic;
1268 }
1269 4 => {
1270 // MUL r/m (unsigned)
1271 instr.mnemonic = Mnemonic::MUL;
1272 instr.category = InstructionCategory::Arithmetic;
1273 }
1274 5 => {
1275 // IMUL r/m (signed)
1276 instr.mnemonic = Mnemonic::IMUL;
1277 instr.category = InstructionCategory::Arithmetic;
1278 }
1279 6 => {
1280 // DIV r/m (unsigned)
1281 instr.mnemonic = Mnemonic::DIV;
1282 instr.category = InstructionCategory::Arithmetic;
1283 }
1284 7 => {
1285 // IDIV r/m (signed)
1286 instr.mnemonic = Mnemonic::IDIV;
1287 instr.category = InstructionCategory::Arithmetic;
1288 }
1289 _ => {
1290 instr.mnemonic = Mnemonic::Invalid;
1291 instr.category = InstructionCategory::Invalid;
1292 }
1293 }
1294 instr.isa_set = IsaSet::I86;
1295 instr.operand_count = if modrm.reg == 0 { 2 } else { 1 }; // TEST has 2 operands
1296
1297 return Ok(offset);
1298 }
1299
1300 // Special handling for LOOP instructions (0xE0, 0xE1, 0xE2)
1301 // These use RCX as a counter and have a rel8 displacement
1302 match first_byte {
1303 0xE0 => {
1304 // LOOPNE/LOOPNZ rel8
1305 instr.mnemonic = Mnemonic::LOOPNE;
1306 instr.category = InstructionCategory::ControlFlow;
1307 instr.isa_set = IsaSet::I86;
1308 instr.operand_count = 1; // rel8
1309 instr.attributes |= InstructionAttributes::HAS_IMM8 | InstructionAttributes::IS_RELATIVE | InstructionAttributes::IS_CONDITIONAL;
1310 return Ok(offset);
1311 }
1312 0xE1 => {
1313 // LOOPE/LOOPZ rel8
1314 instr.mnemonic = Mnemonic::LOOPE;
1315 instr.category = InstructionCategory::ControlFlow;
1316 instr.isa_set = IsaSet::I86;
1317 instr.operand_count = 1; // rel8
1318 instr.attributes |= InstructionAttributes::HAS_IMM8 | InstructionAttributes::IS_RELATIVE | InstructionAttributes::IS_CONDITIONAL;
1319 return Ok(offset);
1320 }
1321 0xE2 => {
1322 // LOOP rel8
1323 instr.mnemonic = Mnemonic::LOOP;
1324 instr.category = InstructionCategory::ControlFlow;
1325 instr.isa_set = IsaSet::I86;
1326 instr.operand_count = 1; // rel8
1327 instr.attributes |= InstructionAttributes::HAS_IMM8 | InstructionAttributes::IS_RELATIVE | InstructionAttributes::IS_CONDITIONAL;
1328 return Ok(offset);
1329 }
1330 _ => {}
1331 }
1332
1333 // Special handling for string instructions (A4-AF)
1334 // These are single-byte opcodes with implicit operands
1335 let has_rex_w = instr.prefixes.effective_w();
1336 match first_byte {
1337 0xA4 => {
1338 // MOVSB - Move byte from DS:(E)SI to ES:(E)DI
1339 instr.mnemonic = Mnemonic::MOVSB;
1340 instr.category = InstructionCategory::DataTransfer;
1341 instr.isa_set = IsaSet::I86;
1342 instr.operand_count = 0; // Implicit operands
1343 return Ok(offset);
1344 }
1345 0xA5 => {
1346 // MOVSW/MOVSD/MOVSQ - Move word/dword/qword
1347 // Size depends on operand size attribute and REX.W
1348 if has_rex_w {
1349 instr.mnemonic = Mnemonic::MOVSQ;
1350 } else if instr.prefixes.has_operand_size() {
1351 instr.mnemonic = Mnemonic::MOVSW;
1352 } else {
1353 instr.mnemonic = Mnemonic::MOVSD;
1354 }
1355 instr.category = InstructionCategory::DataTransfer;
1356 instr.isa_set = IsaSet::I86;
1357 instr.operand_count = 0;
1358 return Ok(offset);
1359 }
1360 0xAA => {
1361 // STOSB - Store byte from AL to ES:(E)DI
1362 instr.mnemonic = Mnemonic::STOSB;
1363 instr.category = InstructionCategory::DataTransfer;
1364 instr.isa_set = IsaSet::I86;
1365 instr.operand_count = 0;
1366 return Ok(offset);
1367 }
1368 0xAB => {
1369 // STOSW/STOSD/STOSQ - Store word/dword/qword
1370 if has_rex_w {
1371 instr.mnemonic = Mnemonic::STOSQ;
1372 } else if instr.prefixes.has_operand_size() {
1373 instr.mnemonic = Mnemonic::STOSW;
1374 } else {
1375 instr.mnemonic = Mnemonic::STOSD;
1376 }
1377 instr.category = InstructionCategory::DataTransfer;
1378 instr.isa_set = IsaSet::I86;
1379 instr.operand_count = 0;
1380 return Ok(offset);
1381 }
1382 0xAC => {
1383 // LODSB - Load byte from DS:(E)SI to AL
1384 instr.mnemonic = Mnemonic::LODSB;
1385 instr.category = InstructionCategory::DataTransfer;
1386 instr.isa_set = IsaSet::I86;
1387 instr.operand_count = 0;
1388 return Ok(offset);
1389 }
1390 0xAD => {
1391 // LODSW/LODSD/LODSQ - Load word/dword/qword
1392 if has_rex_w {
1393 instr.mnemonic = Mnemonic::LODSQ;
1394 } else if instr.prefixes.has_operand_size() {
1395 instr.mnemonic = Mnemonic::LODSW;
1396 } else {
1397 instr.mnemonic = Mnemonic::LODSD;
1398 }
1399 instr.category = InstructionCategory::DataTransfer;
1400 instr.isa_set = IsaSet::I86;
1401 instr.operand_count = 0;
1402 return Ok(offset);
1403 }
1404 0xAE => {
1405 // SCASB - Compare byte from AL with ES:(E)DI
1406 instr.mnemonic = Mnemonic::SCASB;
1407 instr.category = InstructionCategory::Arithmetic;
1408 instr.isa_set = IsaSet::I86;
1409 instr.operand_count = 0;
1410 return Ok(offset);
1411 }
1412 0xAF => {
1413 // SCASW/SCASD/SCASQ - Compare word/dword/qword
1414 if has_rex_w {
1415 instr.mnemonic = Mnemonic::SCASQ;
1416 } else if instr.prefixes.has_operand_size() {
1417 instr.mnemonic = Mnemonic::SCASW;
1418 } else {
1419 instr.mnemonic = Mnemonic::SCASD;
1420 }
1421 instr.category = InstructionCategory::Arithmetic;
1422 instr.isa_set = IsaSet::I86;
1423 instr.operand_count = 0;
1424 return Ok(offset);
1425 }
1426 _ => {}
1427 }
1428
1429 // Special handling for x87 FPU instructions (0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF)
1430 // These instructions have a complex encoding with ModRM and /r fields
1431 if first_byte == 0xD9 {
1432 // D9 xx - x87 FPU instructions
1433 // Need to read ModRM to determine the actual instruction
1434 if offset >= bytes.len() {
1435 return Err(Error::InsufficientBytes);
1436 }
1437 let modrm = ModRm::from_byte(bytes[offset]);
1438 instr.modrm = Some(modrm.raw);
1439 instr.attributes |= InstructionAttributes::HAS_MODRM;
1440 offset += 1;
1441
1442 instr.category = InstructionCategory::SIMD;
1443 instr.isa_set = IsaSet::X87;
1444
1445 // D9 encoding:
1446 // - mod=11 (register): uses reg field to determine instruction
1447 // - mod!=11 (memory): uses reg field for operation, rm for addressing
1448 if modrm.mod_field == 3 {
1449 // Register form - instruction determined by reg and rm fields
1450 match modrm.reg {
1451 0 => {
1452 // FLD st(i) - Push st(i) onto FPU stack
1453 instr.mnemonic = Mnemonic::FLD;
1454 instr.operand_count = 1;
1455 }
1456 2 => {
1457 // FST st(i) - Store st(0) to st(i)
1458 instr.mnemonic = Mnemonic::FST;
1459 instr.operand_count = 1;
1460 }
1461 3 => {
1462 // FSTP st(i) - Store st(0) to st(i) and pop
1463 instr.mnemonic = Mnemonic::FSTP;
1464 instr.operand_count = 1;
1465 }
1466 4 => match modrm.rm {
1467 0 => {
1468 // FCHS - Change sign
1469 instr.mnemonic = Mnemonic::FCHS;
1470 instr.operand_count = 0;
1471 }
1472 1 => {
1473 // FABS - Absolute value
1474 instr.mnemonic = Mnemonic::FABS;
1475 instr.operand_count = 0;
1476 }
1477 4 => {
1478 // FTST - Test st(0)
1479 instr.mnemonic = Mnemonic::FTST;
1480 instr.operand_count = 0;
1481 }
1482 5 => {
1483 // FXAM - Examine st(0)
1484 instr.mnemonic = Mnemonic::FXAM;
1485 instr.operand_count = 0;
1486 }
1487 _ => {
1488 instr.mnemonic = Mnemonic::Invalid;
1489 }
1490 },
1491 5 => match modrm.rm {
1492 0 => {
1493 // FLD1 - Push +1.0
1494 instr.mnemonic = Mnemonic::FLD1;
1495 instr.operand_count = 0;
1496 }
1497 1 => {
1498 // FLDL2T - Push log2(10)
1499 instr.mnemonic = Mnemonic::FLDL2T;
1500 instr.operand_count = 0;
1501 }
1502 2 => {
1503 // FLDL2E - Push log2(e)
1504 instr.mnemonic = Mnemonic::FLDL2E;
1505 instr.operand_count = 0;
1506 }
1507 3 => {
1508 // FLDPI - Push pi
1509 instr.mnemonic = Mnemonic::FLDPI;
1510 instr.operand_count = 0;
1511 }
1512 4 => {
1513 // FLDLG2 - Push log10(2)
1514 instr.mnemonic = Mnemonic::FLDLG2;
1515 instr.operand_count = 0;
1516 }
1517 5 => {
1518 // FLDLN2 - Push ln(2)
1519 instr.mnemonic = Mnemonic::FLDLN2;
1520 instr.operand_count = 0;
1521 }
1522 6 => {
1523 // FLDZ - Push +0.0
1524 instr.mnemonic = Mnemonic::FLDZ;
1525 instr.operand_count = 0;
1526 }
1527 _ => {
1528 instr.mnemonic = Mnemonic::Invalid;
1529 }
1530 },
1531 6 => match modrm.rm {
1532 0 => {
1533 // F2XM1 - 2^x-1
1534 instr.mnemonic = Mnemonic::F2XM1;
1535 instr.operand_count = 0;
1536 }
1537 1 => {
1538 // FYL2X - y * log2(x)
1539 instr.mnemonic = Mnemonic::FYL2X;
1540 instr.operand_count = 0;
1541 }
1542 2 => {
1543 // FPTAN - Partial tangent
1544 instr.mnemonic = Mnemonic::FPTAN;
1545 instr.operand_count = 0;
1546 }
1547 3 => {
1548 // FPATAN - Partial arctangent
1549 instr.mnemonic = Mnemonic::FPATAN;
1550 instr.operand_count = 0;
1551 }
1552 4 => {
1553 // FXTRACT - Extract exponent and significand
1554 instr.mnemonic = Mnemonic::FXTRACT;
1555 instr.operand_count = 0;
1556 }
1557 5 => {
1558 // FPREM1 - Partial remainder (IEEE)
1559 instr.mnemonic = Mnemonic::FPREM1;
1560 instr.operand_count = 0;
1561 }
1562 6 => {
1563 // FDECSTP - Decrement stack pointer
1564 instr.mnemonic = Mnemonic::FDECSTP;
1565 instr.operand_count = 0;
1566 }
1567 7 => {
1568 // FINCSTP - Increment stack pointer
1569 instr.mnemonic = Mnemonic::FINCSTP;
1570 instr.operand_count = 0;
1571 }
1572 _ => {
1573 instr.mnemonic = Mnemonic::Invalid;
1574 }
1575 },
1576 7 => match modrm.rm {
1577 0 => {
1578 // FPREM - Partial remainder
1579 instr.mnemonic = Mnemonic::FPREM;
1580 instr.operand_count = 0;
1581 }
1582 1 => {
1583 // FYL2XP1 - y * log2(x+1)
1584 instr.mnemonic = Mnemonic::FYL2XP1;
1585 instr.operand_count = 0;
1586 }
1587 2 => {
1588 // FSQRT - Square root
1589 instr.mnemonic = Mnemonic::FSQRT;
1590 instr.operand_count = 0;
1591 }
1592 3 => {
1593 // FSINCOS - Sine and cosine
1594 instr.mnemonic = Mnemonic::FSINCOS;
1595 instr.operand_count = 0;
1596 }
1597 4 => {
1598 // FRNDINT - Round to integer
1599 instr.mnemonic = Mnemonic::FRNDINT;
1600 instr.operand_count = 0;
1601 }
1602 5 => {
1603 // FSCALE - Scale by power of 2
1604 instr.mnemonic = Mnemonic::FSCALE;
1605 instr.operand_count = 0;
1606 }
1607 6 => {
1608 // FSIN - Sine
1609 instr.mnemonic = Mnemonic::FSIN;
1610 instr.operand_count = 0;
1611 }
1612 7 => {
1613 // FCOS - Cosine
1614 instr.mnemonic = Mnemonic::FCOS;
1615 instr.operand_count = 0;
1616 }
1617 _ => {
1618 instr.mnemonic = Mnemonic::Invalid;
1619 }
1620 },
1621 _ => {
1622 instr.mnemonic = Mnemonic::Invalid;
1623 }
1624 }
1625 } else {
1626 // Memory form - determined by reg field
1627 match modrm.reg {
1628 0 => {
1629 // FLD m32 - Load real
1630 instr.mnemonic = Mnemonic::FLD;
1631 instr.operand_count = 1;
1632 }
1633 2 => {
1634 // FST m32 - Store real
1635 instr.mnemonic = Mnemonic::FST;
1636 instr.operand_count = 1;
1637 }
1638 3 => {
1639 // FSTP m32 - Store real and pop
1640 instr.mnemonic = Mnemonic::FSTP;
1641 instr.operand_count = 1;
1642 }
1643 4 => {
1644 // FLDENV - Load FPU environment
1645 instr.mnemonic = Mnemonic::FLDENV;
1646 instr.operand_count = 1;
1647 }
1648 5 => {
1649 // FLDCW - Load FPU control word
1650 instr.mnemonic = Mnemonic::FLDCW;
1651 instr.operand_count = 1;
1652 }
1653 6 => {
1654 // FSTENV - Store FPU environment
1655 instr.mnemonic = Mnemonic::FSTENV;
1656 instr.operand_count = 1;
1657 }
1658 7 => {
1659 // FSTCW - Store FPU control word
1660 instr.mnemonic = Mnemonic::FSTCW;
1661 instr.operand_count = 1;
1662 }
1663 _ => {
1664 instr.mnemonic = Mnemonic::Invalid;
1665 }
1666 }
1667 }
1668
1669 return Ok(offset);
1670 }
1671
1672 // D8 xx - x87 FPU arithmetic instructions (FADD, FSUB, FMUL, FDIV, FCOM)
1673 if first_byte == 0xD8 {
1674 if offset >= bytes.len() {
1675 return Err(Error::InsufficientBytes);
1676 }
1677 let modrm = ModRm::from_byte(bytes[offset]);
1678 instr.modrm = Some(modrm.raw);
1679 instr.attributes |= InstructionAttributes::HAS_MODRM;
1680 offset += 1;
1681
1682 instr.category = InstructionCategory::SIMD;
1683 instr.isa_set = IsaSet::X87;
1684
1685 if modrm.mod_field == 3 {
1686 // Register form - ST(0) op ST(i)
1687 match modrm.reg {
1688 0 => {
1689 // FADD ST(0), ST(i)
1690 instr.mnemonic = Mnemonic::FADD;
1691 instr.operand_count = 0; // Implicit operands
1692 }
1693 1 => {
1694 // FMUL ST(0), ST(i)
1695 instr.mnemonic = Mnemonic::FMUL;
1696 instr.operand_count = 0;
1697 }
1698 2 => {
1699 // FCOM ST(0), ST(i)
1700 instr.mnemonic = Mnemonic::FCOM;
1701 instr.operand_count = 0;
1702 }
1703 3 => {
1704 // FCOMP ST(0), ST(i)
1705 instr.mnemonic = Mnemonic::FCOMP;
1706 instr.operand_count = 0;
1707 }
1708 4 => {
1709 // FSUB ST(0), ST(i)
1710 instr.mnemonic = Mnemonic::FSUB;
1711 instr.operand_count = 0;
1712 }
1713 5 => {
1714 // FSUBR ST(0), ST(i)
1715 instr.mnemonic = Mnemonic::FSUBR;
1716 instr.operand_count = 0;
1717 }
1718 6 => {
1719 // FDIV ST(0), ST(i)
1720 instr.mnemonic = Mnemonic::FDIV;
1721 instr.operand_count = 0;
1722 }
1723 7 => {
1724 // FDIVR ST(0), ST(i)
1725 instr.mnemonic = Mnemonic::FDIVR;
1726 instr.operand_count = 0;
1727 }
1728 _ => {
1729 instr.mnemonic = Mnemonic::Invalid;
1730 }
1731 }
1732 } else {
1733 // Memory form - ST(0) op m32real
1734 match modrm.reg {
1735 0 => {
1736 // FADD m32real
1737 instr.mnemonic = Mnemonic::FADD;
1738 instr.operand_count = 1;
1739 }
1740 1 => {
1741 // FMUL m32real
1742 instr.mnemonic = Mnemonic::FMUL;
1743 instr.operand_count = 1;
1744 }
1745 2 => {
1746 // FCOM m32real
1747 instr.mnemonic = Mnemonic::FCOM;
1748 instr.operand_count = 1;
1749 }
1750 3 => {
1751 // FCOMP m32real
1752 instr.mnemonic = Mnemonic::FCOMP;
1753 instr.operand_count = 1;
1754 }
1755 4 => {
1756 // FSUB m32real
1757 instr.mnemonic = Mnemonic::FSUB;
1758 instr.operand_count = 1;
1759 }
1760 5 => {
1761 // FSUBR m32real
1762 instr.mnemonic = Mnemonic::FSUBR;
1763 instr.operand_count = 1;
1764 }
1765 6 => {
1766 // FDIV m32real
1767 instr.mnemonic = Mnemonic::FDIV;
1768 instr.operand_count = 1;
1769 }
1770 7 => {
1771 // FDIVR m32real
1772 instr.mnemonic = Mnemonic::FDIVR;
1773 instr.operand_count = 1;
1774 }
1775 _ => {
1776 instr.mnemonic = Mnemonic::Invalid;
1777 }
1778 }
1779 }
1780
1781 return Ok(offset);
1782 }
1783
1784 // DC xx - x87 FPU arithmetic instructions (extended precision, register: ST(i) op ST(0))
1785 if first_byte == 0xDC {
1786 if offset >= bytes.len() {
1787 return Err(Error::InsufficientBytes);
1788 }
1789 let modrm = ModRm::from_byte(bytes[offset]);
1790 instr.modrm = Some(modrm.raw);
1791 instr.attributes |= InstructionAttributes::HAS_MODRM;
1792 offset += 1;
1793
1794 instr.category = InstructionCategory::SIMD;
1795 instr.isa_set = IsaSet::X87;
1796
1797 if modrm.mod_field == 3 {
1798 // Register form - ST(i) op= ST(0)
1799 match modrm.reg {
1800 0 => {
1801 // FADD ST(i), ST(0)
1802 instr.mnemonic = Mnemonic::FADD;
1803 instr.operand_count = 0;
1804 }
1805 1 => {
1806 // FMUL ST(i), ST(0)
1807 instr.mnemonic = Mnemonic::FMUL;
1808 instr.operand_count = 0;
1809 }
1810 4 => {
1811 // FSUBR ST(i), ST(0)
1812 instr.mnemonic = Mnemonic::FSUBR;
1813 instr.operand_count = 0;
1814 }
1815 5 => {
1816 // FSUB ST(i), ST(0)
1817 instr.mnemonic = Mnemonic::FSUB;
1818 instr.operand_count = 0;
1819 }
1820 6 => {
1821 // FDIVR ST(i), ST(0)
1822 instr.mnemonic = Mnemonic::FDIVR;
1823 instr.operand_count = 0;
1824 }
1825 7 => {
1826 // FDIV ST(i), ST(0)
1827 instr.mnemonic = Mnemonic::FDIV;
1828 instr.operand_count = 0;
1829 }
1830 _ => {
1831 instr.mnemonic = Mnemonic::Invalid;
1832 }
1833 }
1834 } else {
1835 // Memory form - ST(0) op m64real
1836 match modrm.reg {
1837 0 => {
1838 // FADD m64real
1839 instr.mnemonic = Mnemonic::FADD;
1840 instr.operand_count = 1;
1841 }
1842 1 => {
1843 // FMUL m64real
1844 instr.mnemonic = Mnemonic::FMUL;
1845 instr.operand_count = 1;
1846 }
1847 2 => {
1848 // FCOM m64real
1849 instr.mnemonic = Mnemonic::FCOM;
1850 instr.operand_count = 1;
1851 }
1852 3 => {
1853 // FCOMP m64real
1854 instr.mnemonic = Mnemonic::FCOMP;
1855 instr.operand_count = 1;
1856 }
1857 4 => {
1858 // FSUB m64real
1859 instr.mnemonic = Mnemonic::FSUB;
1860 instr.operand_count = 1;
1861 }
1862 5 => {
1863 // FSUBR m64real
1864 instr.mnemonic = Mnemonic::FSUBR;
1865 instr.operand_count = 1;
1866 }
1867 6 => {
1868 // FDIV m64real
1869 instr.mnemonic = Mnemonic::FDIV;
1870 instr.operand_count = 1;
1871 }
1872 7 => {
1873 // FDIVR m64real
1874 instr.mnemonic = Mnemonic::FDIVR;
1875 instr.operand_count = 1;
1876 }
1877 _ => {
1878 instr.mnemonic = Mnemonic::Invalid;
1879 }
1880 }
1881 }
1882
1883 return Ok(offset);
1884 }
1885
1886 // DE xx - x87 FPU arithmetic instructions (pop)
1887 if first_byte == 0xDE {
1888 if offset >= bytes.len() {
1889 return Err(Error::InsufficientBytes);
1890 }
1891 let modrm = ModRm::from_byte(bytes[offset]);
1892 instr.modrm = Some(modrm.raw);
1893 instr.attributes |= InstructionAttributes::HAS_MODRM;
1894 offset += 1;
1895
1896 instr.category = InstructionCategory::SIMD;
1897 instr.isa_set = IsaSet::X87;
1898
1899 if modrm.mod_field == 3 {
1900 // Register form - ST(i) op= ST(0) and pop
1901 match modrm.reg {
1902 0 => {
1903 // FADDP ST(i), ST(0)
1904 instr.mnemonic = Mnemonic::FADDP;
1905 instr.operand_count = 0;
1906 }
1907 1 => {
1908 // FMULP ST(i), ST(0)
1909 instr.mnemonic = Mnemonic::FMULP;
1910 instr.operand_count = 0;
1911 }
1912 4 => {
1913 // FSUBRP ST(i), ST(0)
1914 instr.mnemonic = Mnemonic::FSUBRP;
1915 instr.operand_count = 0;
1916 }
1917 5 => {
1918 // FSUBP ST(i), ST(0)
1919 instr.mnemonic = Mnemonic::FSUBP;
1920 instr.operand_count = 0;
1921 }
1922 6 => {
1923 // FDIVRP ST(i), ST(0)
1924 instr.mnemonic = Mnemonic::FDIVRP;
1925 instr.operand_count = 0;
1926 }
1927 7 => {
1928 // FDIVP ST(i), ST(0)
1929 instr.mnemonic = Mnemonic::FDIVP;
1930 instr.operand_count = 0;
1931 }
1932 _ => {
1933 instr.mnemonic = Mnemonic::Invalid;
1934 }
1935 }
1936 } else {
1937 // Memory form - FADD m16int, etc (rare)
1938 match modrm.reg {
1939 0 => {
1940 // FIADD m16int
1941 instr.mnemonic = Mnemonic::FIADD;
1942 instr.operand_count = 1;
1943 }
1944 1 => {
1945 // FIMUL m16int
1946 instr.mnemonic = Mnemonic::FIMUL;
1947 instr.operand_count = 1;
1948 }
1949 2 => {
1950 // FICOM m16int
1951 instr.mnemonic = Mnemonic::FICOM;
1952 instr.operand_count = 1;
1953 }
1954 3 => {
1955 // FICOMP m16int
1956 instr.mnemonic = Mnemonic::FICOMP;
1957 instr.operand_count = 1;
1958 }
1959 4 => {
1960 // FISUB m16int
1961 instr.mnemonic = Mnemonic::FISUB;
1962 instr.operand_count = 1;
1963 }
1964 5 => {
1965 // FISUBR m16int
1966 instr.mnemonic = Mnemonic::FISUBR;
1967 instr.operand_count = 1;
1968 }
1969 6 => {
1970 // FIDIV m16int
1971 instr.mnemonic = Mnemonic::FIDIV;
1972 instr.operand_count = 1;
1973 }
1974 7 => {
1975 // FIDIVR m16int
1976 instr.mnemonic = Mnemonic::FIDIVR;
1977 instr.operand_count = 1;
1978 }
1979 _ => {
1980 instr.mnemonic = Mnemonic::Invalid;
1981 }
1982 }
1983 }
1984
1985 return Ok(offset);
1986 }
1987
1988 // DD xx - x87 FPU load/store (m64real)
1989 if first_byte == 0xDD {
1990 if offset >= bytes.len() {
1991 return Err(Error::InsufficientBytes);
1992 }
1993 let modrm = ModRm::from_byte(bytes[offset]);
1994 instr.modrm = Some(modrm.raw);
1995 instr.attributes |= InstructionAttributes::HAS_MODRM;
1996 offset += 1;
1997
1998 instr.category = InstructionCategory::SIMD;
1999 instr.isa_set = IsaSet::X87;
2000
2001 if modrm.mod_field == 3 {
2002 // Register form - FFREE, FXCH, FST, FSTP
2003 match modrm.reg {
2004 0 => {
2005 // FFREE ST(i)
2006 instr.mnemonic = Mnemonic::FFREE;
2007 instr.operand_count = 0;
2008 }
2009 1 => {
2010 // FXCH ST(i)
2011 instr.mnemonic = Mnemonic::FXCH;
2012 instr.operand_count = 0;
2013 }
2014 2 => {
2015 // FST ST(i)
2016 instr.mnemonic = Mnemonic::FST;
2017 instr.operand_count = 0;
2018 }
2019 3 => {
2020 // FSTP ST(i)
2021 instr.mnemonic = Mnemonic::FSTP;
2022 instr.operand_count = 0;
2023 }
2024 _ => {
2025 instr.mnemonic = Mnemonic::Invalid;
2026 }
2027 }
2028 } else {
2029 // Memory form
2030 match modrm.reg {
2031 0 => {
2032 // FLD m64real
2033 instr.mnemonic = Mnemonic::FLD;
2034 instr.operand_count = 1;
2035 }
2036 2 => {
2037 // FST m64real
2038 instr.mnemonic = Mnemonic::FST;
2039 instr.operand_count = 1;
2040 }
2041 3 => {
2042 // FSTP m64real
2043 instr.mnemonic = Mnemonic::FSTP;
2044 instr.operand_count = 1;
2045 }
2046 4 => {
2047 // FRSTOR m94/108byte
2048 instr.mnemonic = Mnemonic::FRSTOR;
2049 instr.operand_count = 1;
2050 }
2051 6 => {
2052 // FSAVE m94/108byte
2053 instr.mnemonic = Mnemonic::FSAVE;
2054 instr.operand_count = 1;
2055 }
2056 7 => {
2057 // FSTSW m16
2058 instr.mnemonic = Mnemonic::FSTSW;
2059 instr.operand_count = 1;
2060 }
2061 _ => {
2062 instr.mnemonic = Mnemonic::Invalid;
2063 }
2064 }
2065 }
2066
2067 return Ok(offset);
2068 }
2069
2070 // DB xx - x87 FPU load/store (m80real)
2071 if first_byte == 0xDB {
2072 if offset >= bytes.len() {
2073 return Err(Error::InsufficientBytes);
2074 }
2075 let modrm = ModRm::from_byte(bytes[offset]);
2076 instr.modrm = Some(modrm.raw);
2077 instr.attributes |= InstructionAttributes::HAS_MODRM;
2078 offset += 1;
2079
2080 instr.category = InstructionCategory::SIMD;
2081 instr.isa_set = IsaSet::X87;
2082
2083 if modrm.mod_field == 3 {
2084 // Register form
2085 match modrm.reg {
2086 0 => {
2087 // FCMOVNB ST(0), ST(i)
2088 instr.mnemonic = Mnemonic::FCMOVNB;
2089 instr.operand_count = 0;
2090 }
2091 1 => {
2092 // FCMOVNE ST(0), ST(i)
2093 instr.mnemonic = Mnemonic::FCMOVNE;
2094 instr.operand_count = 0;
2095 }
2096 2 => {
2097 // FCMOVNBE ST(0), ST(i)
2098 instr.mnemonic = Mnemonic::FCMOVNBE;
2099 instr.operand_count = 0;
2100 }
2101 3 => {
2102 // FCMOVNU ST(0), ST(i)
2103 instr.mnemonic = Mnemonic::FCMOVNU;
2104 instr.operand_count = 0;
2105 }
2106 4 => match modrm.rm {
2107 5 => {
2108 // FUCOMPP
2109 instr.mnemonic = Mnemonic::FUCOMPP;
2110 instr.operand_count = 0;
2111 }
2112 _ => {
2113 instr.mnemonic = Mnemonic::Invalid;
2114 }
2115 }
2116 _ => {
2117 instr.mnemonic = Mnemonic::Invalid;
2118 }
2119 }
2120 } else {
2121 // Memory form
2122 match modrm.reg {
2123 0 => {
2124 // FILD m32int
2125 instr.mnemonic = Mnemonic::FILD;
2126 instr.operand_count = 1;
2127 }
2128 2 => {
2129 // FIST m32int
2130 instr.mnemonic = Mnemonic::FIST;
2131 instr.operand_count = 1;
2132 }
2133 3 => {
2134 // FISTP m32int
2135 instr.mnemonic = Mnemonic::FISTP;
2136 instr.operand_count = 1;
2137 }
2138 5 => {
2139 // FLD m80real
2140 instr.mnemonic = Mnemonic::FLD;
2141 instr.operand_count = 1;
2142 }
2143 7 => {
2144 // FSTP m80real
2145 instr.mnemonic = Mnemonic::FSTP;
2146 instr.operand_count = 1;
2147 }
2148 _ => {
2149 instr.mnemonic = Mnemonic::Invalid;
2150 }
2151 }
2152 }
2153
2154 return Ok(offset);
2155 }
2156
2157 // DA xx - x87 FPU (rarely used, mainly FIADD m16int etc)
2158 if first_byte == 0xDA {
2159 if offset >= bytes.len() {
2160 return Err(Error::InsufficientBytes);
2161 }
2162 let modrm = ModRm::from_byte(bytes[offset]);
2163 instr.modrm = Some(modrm.raw);
2164 instr.attributes |= InstructionAttributes::HAS_MODRM;
2165 offset += 1;
2166
2167 instr.category = InstructionCategory::SIMD;
2168 instr.isa_set = IsaSet::X87;
2169
2170 if modrm.mod_field == 3 {
2171 // Register form - FCMOVcc
2172 match modrm.reg {
2173 0 => {
2174 // FCMOVB ST(0), ST(i)
2175 instr.mnemonic = Mnemonic::FCMOVB;
2176 instr.operand_count = 0;
2177 }
2178 1 => {
2179 // FCMOVE ST(0), ST(i)
2180 instr.mnemonic = Mnemonic::FCMOVE;
2181 instr.operand_count = 0;
2182 }
2183 2 => {
2184 // FCMOVBE ST(0), ST(i)
2185 instr.mnemonic = Mnemonic::FCMOVBE;
2186 instr.operand_count = 0;
2187 }
2188 3 => {
2189 // FCMOVU ST(0), ST(i)
2190 instr.mnemonic = Mnemonic::FCMOVU;
2191 instr.operand_count = 0;
2192 }
2193 _ => {
2194 instr.mnemonic = Mnemonic::Invalid;
2195 }
2196 }
2197 } else {
2198 // Memory form - FIADD m32int, etc
2199 match modrm.reg {
2200 0 => {
2201 instr.mnemonic = Mnemonic::FIADD;
2202 instr.operand_count = 1;
2203 }
2204 1 => {
2205 instr.mnemonic = Mnemonic::FIMUL;
2206 instr.operand_count = 1;
2207 }
2208 2 => {
2209 instr.mnemonic = Mnemonic::FICOM;
2210 instr.operand_count = 1;
2211 }
2212 3 => {
2213 instr.mnemonic = Mnemonic::FICOMP;
2214 instr.operand_count = 1;
2215 }
2216 4 => {
2217 instr.mnemonic = Mnemonic::FISUB;
2218 instr.operand_count = 1;
2219 }
2220 5 => {
2221 instr.mnemonic = Mnemonic::FISUBR;
2222 instr.operand_count = 1;
2223 }
2224 6 => {
2225 instr.mnemonic = Mnemonic::FIDIV;
2226 instr.operand_count = 1;
2227 }
2228 7 => {
2229 instr.mnemonic = Mnemonic::FIDIVR;
2230 instr.operand_count = 1;
2231 }
2232 _ => {
2233 instr.mnemonic = Mnemonic::Invalid;
2234 }
2235 }
2236 }
2237
2238 return Ok(offset);
2239 }
2240
2241 // DF xx - x87 FPU miscellaneous (FISTP m64int, etc)
2242 if first_byte == 0xDF {
2243 if offset >= bytes.len() {
2244 return Err(Error::InsufficientBytes);
2245 }
2246 let modrm = ModRm::from_byte(bytes[offset]);
2247 instr.modrm = Some(modrm.raw);
2248 instr.attributes |= InstructionAttributes::HAS_MODRM;
2249 offset += 1;
2250
2251 instr.category = InstructionCategory::SIMD;
2252 instr.isa_set = IsaSet::X87;
2253
2254 if modrm.mod_field == 3 {
2255 // Register form
2256 match modrm.reg {
2257 4 => match modrm.rm {
2258 0 => {
2259 // FNSTSW AX
2260 instr.mnemonic = Mnemonic::FNSTSW;
2261 instr.operand_count = 0;
2262 }
2263 _ => {
2264 instr.mnemonic = Mnemonic::Invalid;
2265 }
2266 }
2267 5 => {
2268 // FUCOMI ST(0), ST(i)
2269 instr.mnemonic = Mnemonic::FUCOMI;
2270 instr.operand_count = 0;
2271 }
2272 6 => {
2273 // FCOMI ST(0), ST(i)
2274 instr.mnemonic = Mnemonic::FCOMI;
2275 instr.operand_count = 0;
2276 }
2277 _ => {
2278 instr.mnemonic = Mnemonic::Invalid;
2279 }
2280 }
2281 } else {
2282 // Memory form
2283 match modrm.reg {
2284 0 => {
2285 // FILD m16int
2286 instr.mnemonic = Mnemonic::FILD;
2287 instr.operand_count = 1;
2288 }
2289 1 => {
2290 // FISTTP m16int
2291 instr.mnemonic = Mnemonic::FISTTP;
2292 instr.operand_count = 1;
2293 }
2294 2 => {
2295 // FIST m16int
2296 instr.mnemonic = Mnemonic::FIST;
2297 instr.operand_count = 1;
2298 }
2299 3 => {
2300 // FISTP m16int
2301 instr.mnemonic = Mnemonic::FISTP;
2302 instr.operand_count = 1;
2303 }
2304 4 => {
2305 // FBLD m80dec
2306 instr.mnemonic = Mnemonic::FBLD;
2307 instr.operand_count = 1;
2308 }
2309 5 => {
2310 // FILD m64int
2311 instr.mnemonic = Mnemonic::FILD;
2312 instr.operand_count = 1;
2313 }
2314 6 => {
2315 // FBSTP m80dec
2316 instr.mnemonic = Mnemonic::FBSTP;
2317 instr.operand_count = 1;
2318 }
2319 7 => {
2320 // FISTP m64int
2321 instr.mnemonic = Mnemonic::FISTP;
2322 instr.operand_count = 1;
2323 }
2324 _ => {
2325 instr.mnemonic = Mnemonic::Invalid;
2326 }
2327 }
2328 }
2329
2330 return Ok(offset);
2331 }
2332
2333 if let Some(def) = instructions_core::lookup_by_opcode(instr.opcode) {
2334 instr.mnemonic = def.mnemonic;
2335 instr.category = def.category;
2336 instr.isa_set = def.isa_set;
2337 instr.attributes = def.attributes;
2338 instr.operand_count = def.operand_count;
2339 } else {
2340 // Unknown opcode
2341 instr.mnemonic = Mnemonic::Invalid;
2342 instr.category = InstructionCategory::Invalid;
2343 }
2344
2345 // Special handling for XCHG RAX, r64 (0x90-0x97)
2346 // 0x90 with no REX.B is NOP, but with REX.B it's XCHG RAX, R8
2347 // 0x91-0x97 are always XCHG RAX, r64
2348 if (0x90..=0x97).contains(&instr.opcode) {
2349 let rex_b = instr.prefixes.effective_b();
2350 if instr.opcode != 0x90 || rex_b {
2351 // Either not 0x90, or 0x90 with REX.B prefix
2352 instr.mnemonic = Mnemonic::XCHG;
2353 instr.category = InstructionCategory::DataTransfer;
2354 instr.isa_set = IsaSet::I86;
2355 instr.operand_count = 2;
2356 }
2357 }
2358
2359 Ok(offset)
2360 }
2361
2362 /// Get the mandatory prefix from decoded prefixes
2363 fn get_mandatory_prefix(instr: &DecodedInstruction) -> opcode::MandatoryPrefix {
2364 let has_66 = instr.prefixes.has_operand_size();
2365 let has_f2 = instr.prefixes.has_repne();
2366 let has_f3 = instr.prefixes.has_rep();
2367
2368 if has_f2 {
2369 opcode::MandatoryPrefix::PF2
2370 } else if has_f3 {
2371 opcode::MandatoryPrefix::PF3
2372 } else if has_66 {
2373 opcode::MandatoryPrefix::P66
2374 } else {
2375 opcode::MandatoryPrefix::None
2376 }
2377 }
2378
2379 /// Apply extended instruction definition to decoded instruction
2380 fn apply_extended_instruction_def(
2381 instr: &mut DecodedInstruction,
2382 def: &instructions_core::ExtendedInstructionDef,
2383 ) {
2384 instr.mnemonic = def.base.mnemonic;
2385 instr.category = def.base.category;
2386 instr.isa_set = def.base.isa_set;
2387 instr.attributes = def.base.attributes;
2388 instr.operand_count = def.base.operand_count;
2389
2390 // If has_imm8 is set, ensure HAS_IMM8 attribute is present
2391 if def.has_imm8 {
2392 instr.attributes |= InstructionAttributes::HAS_IMM8;
2393 }
2394 }
2395
2396 /// Parse ModRM byte
2397 fn parse_modrm(
2398 &self,
2399 bytes: &[u8],
2400 mut offset: usize,
2401 instr: &mut DecodedInstruction,
2402 ) -> Result<usize> {
2403 // Check if instruction needs ModRM and it hasn't been parsed yet
2404 if instr.attributes.contains(InstructionAttributes::HAS_MODRM) && instr.modrm.is_none() {
2405 if offset >= bytes.len() {
2406 return Err(Error::InsufficientBytes);
2407 }
2408
2409 let modrm = ModRm::from_byte(bytes[offset]);
2410 instr.modrm = Some(modrm.raw);
2411 offset += 1;
2412 }
2413
2414 Ok(offset)
2415 }
2416
2417 /// Parse SIB byte
2418 fn parse_sib(
2419 &self,
2420 bytes: &[u8],
2421 mut offset: usize,
2422 instr: &mut DecodedInstruction,
2423 ) -> Result<usize> {
2424 if let Some(modrm_byte) = instr.modrm {
2425 let modrm = ModRm::from_byte(modrm_byte);
2426
2427 // Check if SIB is needed
2428 if modrm.has_sib() {
2429 if offset >= bytes.len() {
2430 return Err(Error::InsufficientBytes);
2431 }
2432
2433 instr.sib = Some(bytes[offset]);
2434 offset += 1;
2435 }
2436 }
2437
2438 Ok(offset)
2439 }
2440
2441 /// Parse displacement
2442 fn parse_displacement(
2443 &self,
2444 bytes: &[u8],
2445 mut offset: usize,
2446 instr: &mut DecodedInstruction,
2447 ) -> Result<usize> {
2448 // Determine displacement size
2449 let disp_size = if instr.attributes.contains(InstructionAttributes::HAS_DISP) {
2450 // For relative branches, typically 32-bit displacement
2451 4u8
2452 } else if let Some(modrm_byte) = instr.modrm {
2453 let modrm = ModRm::from_byte(modrm_byte);
2454 if let Some(sib_byte) = instr.sib {
2455 let sib = Sib::from_byte(sib_byte);
2456 modrm::sib_disp_size(modrm.mod_field, sib.base)
2457 } else {
2458 modrm.disp_size()
2459 }
2460 } else {
2461 0
2462 };
2463
2464 if disp_size > 0 {
2465 if let Some((disp, new_offset)) = immediate::extract_disp(bytes, offset, disp_size) {
2466 instr.displacement = disp;
2467 instr.disp_size = disp_size;
2468 offset = new_offset;
2469 } else {
2470 return Err(Error::InsufficientBytes);
2471 }
2472 }
2473
2474 Ok(offset)
2475 }
2476
2477 /// Parse immediate
2478 fn parse_immediate(
2479 &self,
2480 bytes: &[u8],
2481 mut offset: usize,
2482 instr: &mut DecodedInstruction,
2483 ) -> Result<usize> {
2484 // Check if instruction has immediate
2485 if instr.attributes.contains(InstructionAttributes::HAS_IMM)
2486 || instr.attributes.contains(InstructionAttributes::HAS_IMM8)
2487 {
2488 // Determine immediate size based on instruction
2489 let imm_size = if instr.attributes.contains(InstructionAttributes::HAS_IMM8) {
2490 // 8-bit immediate (for VEX shuffle instructions, etc.)
2491 1
2492 } else if instr.opcode >= 0xB8 && instr.opcode <= 0xBF {
2493 // MOV r, imm - size depends on REX.W prefix and mode
2494 if self.is_64_bit && instr.prefixes.effective_w() {
2495 8 // MOV r64, imm64 (REX.W + B8+rd)
2496 } else {
2497 4 // MOV r32, imm32 (B8+rd) or MOV r16, imm16 (with 66 prefix)
2498 }
2499 } else {
2500 4 // With REX.W or default 32-bit
2501 };
2502
2503 if let Some((imm, new_offset)) = immediate::extract_imm(bytes, offset, imm_size) {
2504 instr.immediate = imm;
2505 instr.imm_size = imm_size;
2506 offset = new_offset;
2507 } else {
2508 return Err(Error::InsufficientBytes);
2509 }
2510 }
2511
2512 Ok(offset)
2513 }
2514
2515 /// Build operand information
2516 fn build_operands(&self, instr: &mut DecodedInstruction) -> Result<()> {
2517 // Handle EVEX-encoded instructions (AVX-512) separately
2518 if instr.prefixes.has_evex() {
2519 return self.build_evex_operands(instr);
2520 }
2521
2522 // Handle VEX-encoded instructions separately
2523 if instr.prefixes.has_vex() {
2524 return self.build_vex_operands(instr);
2525 }
2526
2527 let rex_r = instr.prefixes.effective_r();
2528 let rex_b = instr.prefixes.effective_b();
2529 let rex_x = instr.prefixes.effective_x();
2530 let rex_w = instr.prefixes.effective_w();
2531
2532 // Special handling for 0F 01 system instructions (SGDT, SIDT, LGDT, LIDT, SMSW, LMSW, INVLPG, etc.)
2533 // These instructions have specific operand types that don't follow standard ModRM patterns
2534 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x01 {
2535 if let Some(modrm_byte) = instr.modrm {
2536 let modrm = ModRm::from_byte(modrm_byte);
2537
2538 match instr.mnemonic {
2539 Mnemonic::SGDT | Mnemonic::SIDT => {
2540 // Store descriptor table: OP m - stores 6 bytes to memory
2541 if !modrm.is_register() {
2542 let sib = instr.sib.map(Sib::from_byte);
2543 let mem = address::build_mem_operand(
2544 &modrm,
2545 sib.as_ref(),
2546 instr.displacement,
2547 instr.prefixes.segment(),
2548 rex_b,
2549 rex_x,
2550 );
2551 instr.operands[0] = Operand::memory(OperandSize::Size48, OperandAccess::Write, mem);
2552 instr.operand_count = 1;
2553 }
2554 return Ok(());
2555 }
2556 Mnemonic::LGDT | Mnemonic::LIDT => {
2557 // Load descriptor table: OP m - loads from 6 bytes (32-bit) or 10 bytes (64-bit)
2558 if !modrm.is_register() {
2559 let sib = instr.sib.map(Sib::from_byte);
2560 let mem = address::build_mem_operand(
2561 &modrm,
2562 sib.as_ref(),
2563 instr.displacement,
2564 instr.prefixes.segment(),
2565 rex_b,
2566 rex_x,
2567 );
2568 // Size depends on mode: 6 bytes in legacy mode, 10 bytes in 64-bit mode
2569 let size = if self.is_64_bit { OperandSize::Size80 } else { OperandSize::Size48 };
2570 instr.operands[0] = Operand::memory(size, OperandAccess::Read, mem);
2571 instr.operand_count = 1;
2572 }
2573 return Ok(());
2574 }
2575 Mnemonic::SMSW => {
2576 // SMSW: OP r/m16 - store machine status word
2577 if modrm.is_register() {
2578 instr.operands[0] = Operand::register(
2579 modrm.rm_register(rex_b, false),
2580 OperandSize::Size16,
2581 OperandAccess::Write,
2582 );
2583 } else {
2584 let sib = instr.sib.map(Sib::from_byte);
2585 let mem = address::build_mem_operand(
2586 &modrm,
2587 sib.as_ref(),
2588 instr.displacement,
2589 instr.prefixes.segment(),
2590 rex_b,
2591 rex_x,
2592 );
2593 instr.operands[0] = Operand::memory(OperandSize::Size16, OperandAccess::Write, mem);
2594 }
2595 instr.operand_count = 1;
2596 return Ok(());
2597 }
2598 Mnemonic::LMSW => {
2599 // LMSW: OP r/m16 - load machine status word
2600 if modrm.is_register() {
2601 instr.operands[0] = Operand::register(
2602 modrm.rm_register(rex_b, false),
2603 OperandSize::Size16,
2604 OperandAccess::Read,
2605 );
2606 } else {
2607 let sib = instr.sib.map(Sib::from_byte);
2608 let mem = address::build_mem_operand(
2609 &modrm,
2610 sib.as_ref(),
2611 instr.displacement,
2612 instr.prefixes.segment(),
2613 rex_b,
2614 rex_x,
2615 );
2616 instr.operands[0] = Operand::memory(OperandSize::Size16, OperandAccess::Read, mem);
2617 }
2618 instr.operand_count = 1;
2619 return Ok(());
2620 }
2621 Mnemonic::INVLPG => {
2622 // INVLPG: OP m - invalidate TLB entry
2623 if !modrm.is_register() {
2624 let sib = instr.sib.map(Sib::from_byte);
2625 let mem = address::build_mem_operand(
2626 &modrm,
2627 sib.as_ref(),
2628 instr.displacement,
2629 instr.prefixes.segment(),
2630 rex_b,
2631 rex_x,
2632 );
2633 instr.operands[0] = Operand::memory(OperandSize::Size8, OperandAccess::Read, mem);
2634 instr.operand_count = 1;
2635 }
2636 return Ok(());
2637 }
2638 Mnemonic::SWAPGS | Mnemonic::RDTSCP | Mnemonic::CLAC | Mnemonic::STAC => {
2639 // These instructions have no operands
2640 instr.operand_count = 0;
2641 return Ok(());
2642 }
2643 _ => {}
2644 }
2645 }
2646 }
2647
2648 // Special handling for RDTSC (0F 31)
2649 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x31 {
2650 instr.operand_count = 0;
2651 return Ok(());
2652 }
2653
2654 // Special handling for RDPMC (0F 33)
2655 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x33 {
2656 instr.operand_count = 0;
2657 return Ok(());
2658 }
2659
2660 // Special handling for 0F AE group (FXSAVE, FXRSTOR, LDMXCSR, STMXCSR, CLFLUSH, etc.)
2661 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0xAE {
2662 if let Some(modrm_byte) = instr.modrm {
2663 let modrm = ModRm::from_byte(modrm_byte);
2664 let sib = instr.sib.map(Sib::from_byte);
2665
2666 match instr.mnemonic {
2667 Mnemonic::FXSAVE | Mnemonic::FXRSTOR => {
2668 // 512-byte memory operand
2669 if !modrm.is_register() {
2670 let mem = address::build_mem_operand(
2671 &modrm, sib.as_ref(), instr.displacement,
2672 instr.prefixes.segment(), rex_b, rex_x,
2673 );
2674 let access = if instr.mnemonic == Mnemonic::FXSAVE {
2675 OperandAccess::Write
2676 } else {
2677 OperandAccess::Read
2678 };
2679 instr.operands[0] = Operand::memory(OperandSize::Size512, access, mem);
2680 instr.operand_count = 1;
2681 }
2682 return Ok(());
2683 }
2684 Mnemonic::LDMXCSR | Mnemonic::STMXCSR => {
2685 // 32-bit memory operand
2686 if !modrm.is_register() {
2687 let mem = address::build_mem_operand(
2688 &modrm, sib.as_ref(), instr.displacement,
2689 instr.prefixes.segment(), rex_b, rex_x,
2690 );
2691 let access = if instr.mnemonic == Mnemonic::LDMXCSR {
2692 OperandAccess::Read
2693 } else {
2694 OperandAccess::Write
2695 };
2696 instr.operands[0] = Operand::memory(OperandSize::Size32, access, mem);
2697 instr.operand_count = 1;
2698 }
2699 return Ok(());
2700 }
2701 Mnemonic::CLFLUSH | Mnemonic::CLFLUSHOPT | Mnemonic::CLWB => {
2702 // 8-bit memory operand (cache line)
2703 if !modrm.is_register() {
2704 let mem = address::build_mem_operand(
2705 &modrm, sib.as_ref(), instr.displacement,
2706 instr.prefixes.segment(), rex_b, rex_x,
2707 );
2708 instr.operands[0] = Operand::memory(OperandSize::Size8, OperandAccess::Read, mem);
2709 instr.operand_count = 1;
2710 }
2711 return Ok(());
2712 }
2713 Mnemonic::XSAVE | Mnemonic::XRSTOR | Mnemonic::XSAVEOPT => {
2714 // Memory operand for XSAVE area
2715 if !modrm.is_register() {
2716 let mem = address::build_mem_operand(
2717 &modrm, sib.as_ref(), instr.displacement,
2718 instr.prefixes.segment(), rex_b, rex_x,
2719 );
2720 instr.operands[0] = Operand::memory(OperandSize::Size64, OperandAccess::ReadWrite, mem);
2721 instr.operand_count = 1;
2722 }
2723 return Ok(());
2724 }
2725 _ => {}
2726 }
2727 }
2728 }
2729
2730 // Special handling for 0F 0D group (PREFETCH, PREFETCHW, PREFETCHWT1)
2731 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x0D {
2732 if let Some(modrm_byte) = instr.modrm {
2733 let modrm = ModRm::from_byte(modrm_byte);
2734 // All prefetch instructions have a single memory operand
2735 if !modrm.is_register() {
2736 let sib = instr.sib.map(Sib::from_byte);
2737 let mem = address::build_mem_operand(
2738 &modrm, sib.as_ref(), instr.displacement,
2739 instr.prefixes.segment(), rex_b, rex_x,
2740 );
2741 instr.operands[0] = Operand::memory(OperandSize::Size8, OperandAccess::Read, mem);
2742 instr.operand_count = 1;
2743 }
2744 return Ok(());
2745 }
2746 }
2747
2748 // Special handling for 0F 1C group (CLDEMOTE)
2749 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x1C {
2750 if let Some(modrm_byte) = instr.modrm {
2751 let modrm = ModRm::from_byte(modrm_byte);
2752 if matches!(instr.mnemonic, Mnemonic::CLDEMOTE) {
2753 // CLDEMOTE has a single memory operand
2754 if !modrm.is_register() {
2755 let sib = instr.sib.map(Sib::from_byte);
2756 let mem = address::build_mem_operand(
2757 &modrm, sib.as_ref(), instr.displacement,
2758 instr.prefixes.segment(), rex_b, rex_x,
2759 );
2760 instr.operands[0] = Operand::memory(OperandSize::Size8, OperandAccess::Read, mem);
2761 instr.operand_count = 1;
2762 }
2763 return Ok(());
2764 }
2765 }
2766 }
2767
2768 // Special handling for 0F C7 group (CMPXCHG8B, CMPXCHG16B, RDPID)
2769 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0xC7 {
2770 if let Some(modrm_byte) = instr.modrm {
2771 let modrm = ModRm::from_byte(modrm_byte);
2772 let sib = instr.sib.map(Sib::from_byte);
2773
2774 match instr.mnemonic {
2775 Mnemonic::RDPID => {
2776 // RDPID reads TSC_AUX into the specified register
2777 if modrm.mod_field == 3 {
2778 instr.operands[0] = Operand::register(
2779 modrm.rm_register(rex_b, true),
2780 OperandSize::Size64,
2781 OperandAccess::Write,
2782 );
2783 instr.operand_count = 1;
2784 }
2785 return Ok(());
2786 }
2787 Mnemonic::CMPXCHG8B | Mnemonic::CMPXCHG16B => {
2788 // Memory operand
2789 if !modrm.is_register() {
2790 let mem = address::build_mem_operand(
2791 &modrm, sib.as_ref(), instr.displacement,
2792 instr.prefixes.segment(), rex_b, rex_x,
2793 );
2794 let size = if instr.mnemonic == Mnemonic::CMPXCHG16B {
2795 OperandSize::Size128
2796 } else {
2797 OperandSize::Size64
2798 };
2799 instr.operands[0] = Operand::memory(size, OperandAccess::ReadWrite, mem);
2800 instr.operand_count = 1;
2801 }
2802 return Ok(());
2803 }
2804 _ => {}
2805 }
2806 }
2807 }
2808
2809 // Special handling for XGETBV and XSETBV (0F 01 D0/D1)
2810 if instr.opcode_map == OpcodeMap::Map0F && instr.opcode == 0x01 {
2811 match instr.mnemonic {
2812 Mnemonic::XGETBV | Mnemonic::XSETBV => {
2813 // These use implicit registers (EDX:EAX for XGETBV, EDX:ECX for XSETBV)
2814 instr.operand_count = 0;
2815 return Ok(());
2816 }
2817 _ => {}
2818 }
2819 }
2820
2821 // Special handling for FF opcode group (INC/DEC/CALL/JMP/PUSH)
2822 // The actual instruction is determined by the ModRM reg field
2823 if instr.opcode == 0xFF {
2824 if let Some(modrm_byte) = instr.modrm {
2825 let modrm = ModRm::from_byte(modrm_byte);
2826 match modrm.reg {
2827 0 => {
2828 // FF /0 - INC r/m
2829 instr.mnemonic = Mnemonic::INC;
2830 }
2831 1 => {
2832 // FF /1 - DEC r/m
2833 instr.mnemonic = Mnemonic::DEC;
2834 }
2835 2 => {
2836 // FF /2 - CALL r/m
2837 instr.mnemonic = Mnemonic::CALL;
2838 }
2839 4 => {
2840 // FF /4 - JMP r/m
2841 instr.mnemonic = Mnemonic::JMP;
2842 }
2843 6 => {
2844 // FF /6 - PUSH r/m
2845 instr.mnemonic = Mnemonic::PUSH;
2846 }
2847 _ => {
2848 // Other values are invalid or different instructions
2849 }
2850 }
2851 }
2852 }
2853
2854 // Special handling for Group 1 opcodes (0x80-0x83): ADD/OR/ADC/SBB/AND/SUB/XOR/CMP r/m, imm
2855 // These instructions have the form: OP r/m, imm (destination is r/m, not reg)
2856 if (0x80..=0x83).contains(&instr.opcode) {
2857 let is_64bit = rex_w;
2858 let op_size = if is_64bit {
2859 OperandSize::Size64
2860 } else if instr.prefixes.has_operand_size() {
2861 OperandSize::Size16
2862 } else {
2863 OperandSize::Size32
2864 };
2865
2866 if let Some(modrm_byte) = instr.modrm {
2867 let modrm = ModRm::from_byte(modrm_byte);
2868
2869 // First operand: r/m (destination)
2870 if modrm.is_register() {
2871 instr.operands[0] = Operand::register(
2872 modrm.rm_register(rex_b, is_64bit),
2873 op_size,
2874 OperandAccess::ReadWrite,
2875 );
2876 } else {
2877 let sib = instr.sib.map(Sib::from_byte);
2878 let mem = address::build_mem_operand(
2879 &modrm,
2880 sib.as_ref(),
2881 instr.displacement,
2882 instr.prefixes.segment(),
2883 rex_b,
2884 rex_x,
2885 );
2886 instr.operands[0] = Operand::memory(op_size, OperandAccess::ReadWrite, mem);
2887 }
2888
2889 // Second operand: immediate
2890 // For 0x83, the immediate is sign-extended from 8 bits
2891 let imm_size = match instr.opcode {
2892 0x80 | 0x82 => OperandSize::Size8,
2893 0x81 => op_size,
2894 0x83 => OperandSize::Size8, // Sign-extended to op_size
2895 _ => OperandSize::Size8,
2896 };
2897 instr.operands[1] = Operand::immediate(instr.immediate, imm_size);
2898 instr.operand_count = 2;
2899 }
2900 return Ok(());
2901 }
2902
2903 // Special handling for Group 2 opcodes (Shift/Rotate: C0, C1, D0, D1, D2, D3)
2904 // These instructions have the form: OP r/m, imm8/1/CL
2905 if matches!(instr.opcode, 0xC0 | 0xC1 | 0xD0 | 0xD1 | 0xD2 | 0xD3) {
2906 // Determine operand size
2907 let is_8bit = instr.opcode == 0xC0 || instr.opcode == 0xD0 || instr.opcode == 0xD2;
2908 let op_size = if is_8bit {
2909 OperandSize::Size8
2910 } else if rex_w {
2911 OperandSize::Size64
2912 } else if instr.prefixes.has_operand_size() {
2913 OperandSize::Size16
2914 } else {
2915 OperandSize::Size32
2916 };
2917
2918 if let Some(modrm_byte) = instr.modrm {
2919 let modrm = ModRm::from_byte(modrm_byte);
2920
2921 // First operand: r/m (destination, read-write)
2922 if modrm.is_register() {
2923 instr.operands[0] = Operand::register(
2924 modrm.rm_register(rex_b, !is_8bit && rex_w),
2925 op_size,
2926 OperandAccess::ReadWrite,
2927 );
2928 } else {
2929 let sib = instr.sib.map(Sib::from_byte);
2930 let mem = address::build_mem_operand(
2931 &modrm,
2932 sib.as_ref(),
2933 instr.displacement,
2934 instr.prefixes.segment(),
2935 rex_b,
2936 rex_x,
2937 );
2938 instr.operands[0] = Operand::memory(op_size, OperandAccess::ReadWrite, mem);
2939 }
2940
2941 // Second operand: count (imm8, implicit 1, or CL)
2942 match instr.opcode {
2943 0xC0 | 0xC1 => {
2944 // imm8 count
2945 instr.operands[1] = Operand::immediate(instr.immediate, OperandSize::Size8);
2946 }
2947 0xD0 | 0xD1 => {
2948 // Implicit 1
2949 instr.operands[1] = Operand::immediate(1, OperandSize::Size8);
2950 }
2951 0xD2 | 0xD3 => {
2952 // CL register
2953 instr.operands[1] = Operand::register(
2954 Register::CL,
2955 OperandSize::Size8,
2956 OperandAccess::Read,
2957 );
2958 }
2959 _ => {}
2960 }
2961 instr.operand_count = 2;
2962 }
2963 return Ok(());
2964 }
2965
2966 // Special handling for Group 3 opcodes (0xF6, 0xF7): TEST/NOT/NEG/MUL/IMUL/DIV/IDIV r/m
2967 if instr.opcode == 0xF6 || instr.opcode == 0xF7 {
2968 let is_8bit = instr.opcode == 0xF6;
2969 let op_size = if is_8bit {
2970 OperandSize::Size8
2971 } else if rex_w {
2972 OperandSize::Size64
2973 } else if instr.prefixes.has_operand_size() {
2974 OperandSize::Size16
2975 } else {
2976 OperandSize::Size32
2977 };
2978
2979 if let Some(modrm_byte) = instr.modrm {
2980 let modrm = ModRm::from_byte(modrm_byte);
2981
2982 // First operand: r/m
2983 if modrm.is_register() {
2984 instr.operands[0] = Operand::register(
2985 modrm.rm_register(rex_b, !is_8bit && rex_w),
2986 op_size,
2987 if modrm.reg == 0 { OperandAccess::Read } else { OperandAccess::ReadWrite }, // TEST is read-only
2988 );
2989 } else {
2990 let sib = instr.sib.map(Sib::from_byte);
2991 let mem = address::build_mem_operand(
2992 &modrm,
2993 sib.as_ref(),
2994 instr.displacement,
2995 instr.prefixes.segment(),
2996 rex_b,
2997 rex_x,
2998 );
2999 instr.operands[0] = Operand::memory(
3000 op_size,
3001 if modrm.reg == 0 { OperandAccess::Read } else { OperandAccess::ReadWrite },
3002 mem,
3003 );
3004 }
3005
3006 // Second operand: immediate (only for TEST)
3007 if modrm.reg == 0 {
3008 let imm_size = if is_8bit { OperandSize::Size8 } else { op_size };
3009 instr.operands[1] = Operand::immediate(instr.immediate, imm_size);
3010 instr.operand_count = 2;
3011 } else {
3012 instr.operand_count = 1;
3013 }
3014 }
3015 return Ok(());
3016 }
3017
3018 // Special handling for LOOP instructions (0xE0, 0xE1, 0xE2)
3019 if matches!(instr.opcode, 0xE0 | 0xE1 | 0xE2) {
3020 // LOOP instructions have a single rel8 operand
3021 instr.operands[0] = Operand::immediate(instr.immediate, OperandSize::Size8);
3022 instr.operand_count = 1;
3023 return Ok(());
3024 }
3025
3026 // Build operands based on instruction type
3027 match instr.mnemonic {
3028 Mnemonic::MOV => {
3029 if let Some(modrm_byte) = instr.modrm {
3030 let modrm = ModRm::from_byte(modrm_byte);
3031 let is_64bit = rex_w;
3032
3033 if modrm.is_register() {
3034 // Register-to-register MOV
3035 let reg_size = if is_64bit {
3036 OperandSize::Size64
3037 } else {
3038 OperandSize::Size32
3039 };
3040 instr.operands[0] = Operand::register(
3041 modrm.rm_register(rex_b, is_64bit),
3042 reg_size,
3043 OperandAccess::Write,
3044 );
3045 instr.operands[1] = Operand::register(
3046 modrm.reg_register(rex_r, is_64bit),
3047 reg_size,
3048 OperandAccess::Read,
3049 );
3050 instr.operand_count = 2;
3051 } else {
3052 // Memory operand
3053 let sib = instr.sib.map(Sib::from_byte);
3054 let mem = address::build_mem_operand(
3055 &modrm,
3056 sib.as_ref(),
3057 instr.displacement,
3058 instr.prefixes.segment(),
3059 rex_b,
3060 rex_x,
3061 );
3062 let op_size = if is_64bit {
3063 OperandSize::Size64
3064 } else {
3065 OperandSize::Size32
3066 };
3067 instr.operands[0] = Operand::memory(op_size, OperandAccess::Write, mem);
3068 instr.operands[1] = Operand::register(
3069 modrm.reg_register(rex_r, is_64bit),
3070 op_size,
3071 OperandAccess::Read,
3072 );
3073 instr.operand_count = 2;
3074 }
3075 } else if instr.attributes.contains(InstructionAttributes::HAS_IMM) {
3076 // MOV r, imm (opcode 0xB8-0xBF)
3077 // In 64-bit mode: 32-bit operand size by default, 64-bit with REX.W
3078 let reg_idx = (instr.opcode - 0xB8) & 0x07;
3079 let is_64bit = rex_w;
3080 let op_size = if is_64bit {
3081 OperandSize::Size64
3082 } else {
3083 OperandSize::Size32
3084 };
3085 let reg = if is_64bit {
3086 modrm::get_register_64(reg_idx, rex_b)
3087 } else {
3088 modrm::get_register_32(reg_idx, rex_b)
3089 };
3090 instr.operands[0] = Operand::register(reg, op_size, OperandAccess::Write);
3091 instr.operands[1] = Operand::immediate(instr.immediate, op_size);
3092 instr.operand_count = 2;
3093 }
3094 }
3095 Mnemonic::PUSH | Mnemonic::POP => {
3096 let stack_width = instr.stack_width;
3097
3098 // Check if this is PUSH with immediate (0x6A or 0x68)
3099 if instr.mnemonic == Mnemonic::PUSH && (instr.opcode == 0x6A || instr.opcode == 0x68) {
3100 // PUSH imm8 or PUSH imm32/64
3101 let imm_size = if instr.opcode == 0x6A {
3102 OperandSize::Size8 // Sign-extended to 64-bit
3103 } else {
3104 OperandSize::Size32 // Sign-extended to 64-bit
3105 };
3106 instr.operands[0] = Operand::immediate(instr.immediate, imm_size);
3107 instr.stack_pointer_info = instruction::StackPointerInfo::push(stack_width);
3108 instr.operand_count = 1;
3109 } else {
3110 // PUSH/POP register (50-5F)
3111 let reg_idx = (instr.opcode - 0x50) & 0x07;
3112 let reg = modrm::get_register_64(reg_idx, rex_b);
3113
3114 if instr.mnemonic == Mnemonic::PUSH {
3115 instr.operands[0] =
3116 Operand::register(reg, OperandSize::Size64, OperandAccess::Read);
3117 instr.stack_pointer_info = instruction::StackPointerInfo::push(stack_width);
3118 } else {
3119 instr.operands[0] =
3120 Operand::register(reg, OperandSize::Size64, OperandAccess::Write);
3121 instr.stack_pointer_info = instruction::StackPointerInfo::pop(stack_width);
3122 }
3123 instr.operand_count = 1;
3124 }
3125 }
3126 Mnemonic::INC | Mnemonic::DEC => {
3127 // FF /0 (INC) and FF /1 (DEC) - single r/m operand
3128 if let Some(modrm_byte) = instr.modrm {
3129 let modrm = ModRm::from_byte(modrm_byte);
3130 // Determine operand size: 32-bit by default, 64-bit with REX.W
3131 let is_64bit = rex_w;
3132 let op_size = if is_64bit {
3133 OperandSize::Size64
3134 } else {
3135 OperandSize::Size32
3136 };
3137
3138 if modrm.is_register() {
3139 // Register operand (from rm field)
3140 instr.operands[0] = Operand::register(
3141 modrm.rm_register(rex_b, is_64bit),
3142 op_size,
3143 OperandAccess::ReadWrite,
3144 );
3145 } else {
3146 // Memory operand
3147 let sib = instr.sib.map(Sib::from_byte);
3148 let mem = address::build_mem_operand(
3149 &modrm,
3150 sib.as_ref(),
3151 instr.displacement,
3152 instr.prefixes.segment(),
3153 rex_b,
3154 rex_x,
3155 );
3156 instr.operands[0] = Operand::memory(op_size, OperandAccess::ReadWrite, mem);
3157 }
3158 instr.operand_count = 1;
3159 }
3160 }
3161 Mnemonic::MOVSXD => {
3162 // MOVSXD r64, r/m32 (63 /r) - sign-extend 32-bit to 64-bit
3163 if let Some(modrm_byte) = instr.modrm {
3164 let modrm = ModRm::from_byte(modrm_byte);
3165
3166 // Destination is always 64-bit register (reg field)
3167 instr.operands[0] = Operand::register(
3168 modrm.reg_register(rex_r, true), // is_64bit = true
3169 OperandSize::Size64,
3170 OperandAccess::Write,
3171 );
3172
3173 // Source is 32-bit register or memory (rm field)
3174 if modrm.is_register() {
3175 instr.operands[1] = Operand::register(
3176 modrm.rm_register(rex_b, false), // is_64bit = false for source
3177 OperandSize::Size32,
3178 OperandAccess::Read,
3179 );
3180 } else {
3181 let sib = instr.sib.map(Sib::from_byte);
3182 let mem = address::build_mem_operand(
3183 &modrm,
3184 sib.as_ref(),
3185 instr.displacement,
3186 instr.prefixes.segment(),
3187 rex_b,
3188 rex_x,
3189 );
3190 instr.operands[1] = Operand::memory(OperandSize::Size32, OperandAccess::Read, mem);
3191 }
3192 instr.operand_count = 2;
3193 }
3194 }
3195 Mnemonic::XCHG => {
3196 // XCHG has two forms:
3197 // 1. XCHG r/m, r (86/87 with ModRM)
3198 // 2. XCHG rAX, r64 (90-97, 1-byte form)
3199 if instr.opcode == 0x86 || instr.opcode == 0x87 {
3200 // XCHG r/m, r with ModRM
3201 if let Some(modrm_byte) = instr.modrm {
3202 let modrm = ModRm::from_byte(modrm_byte);
3203 let is_8bit = instr.opcode == 0x86;
3204 let is_64bit = rex_w && !is_8bit;
3205 let op_size = if is_8bit {
3206 OperandSize::Size8
3207 } else if is_64bit {
3208 OperandSize::Size64
3209 } else if instr.prefixes.has_operand_size() {
3210 OperandSize::Size16
3211 } else {
3212 OperandSize::Size32
3213 };
3214
3215 // First operand: r/m
3216 if modrm.is_register() {
3217 instr.operands[0] = Operand::register(
3218 modrm.rm_register(rex_b, is_64bit),
3219 op_size,
3220 OperandAccess::ReadWrite,
3221 );
3222 } else {
3223 let sib = instr.sib.map(Sib::from_byte);
3224 let mem = address::build_mem_operand(
3225 &modrm,
3226 sib.as_ref(),
3227 instr.displacement,
3228 instr.prefixes.segment(),
3229 rex_b,
3230 rex_x,
3231 );
3232 instr.operands[0] = Operand::memory(op_size, OperandAccess::ReadWrite, mem);
3233 }
3234
3235 // Second operand: reg
3236 instr.operands[1] = Operand::register(
3237 modrm.reg_register(rex_r, is_64bit),
3238 op_size,
3239 OperandAccess::ReadWrite,
3240 );
3241 instr.operand_count = 2;
3242 }
3243 } else if (0x90..=0x97).contains(&instr.opcode) {
3244 // XCHG rAX, r64 (90+rd)
3245 // 0x90 without REX.B is NOP (handled by Mnemonic::NOP)
3246 // 0x90 with REX.B is XCHG RAX, R8
3247 let reg_idx = (instr.opcode - 0x90) & 0x07;
3248 let reg = modrm::get_register_64(reg_idx, rex_b);
3249
3250 // First operand: RAX (always 64-bit in 64-bit mode)
3251 instr.operands[0] = Operand::register(
3252 Register::RAX,
3253 OperandSize::Size64,
3254 OperandAccess::ReadWrite,
3255 );
3256 // Second operand: reg
3257 instr.operands[1] = Operand::register(reg, OperandSize::Size64, OperandAccess::ReadWrite);
3258 instr.operand_count = 2;
3259 }
3260 }
3261 Mnemonic::NOP => {
3262 // Check if this is a multi-byte NOP (0F 1F with ModRM)
3263 if let Some(modrm_byte) = instr.modrm {
3264 // Multi-byte NOP has a memory operand
3265 let modrm = ModRm::from_byte(modrm_byte);
3266
3267 if modrm.is_register() {
3268 // Register form: NOP r/m (rare)
3269 let op_size = if rex_w {
3270 OperandSize::Size64
3271 } else {
3272 OperandSize::Size32
3273 };
3274 instr.operands[0] = Operand::register(
3275 modrm.rm_register_64(rex_b),
3276 op_size,
3277 OperandAccess::Read,
3278 );
3279 } else {
3280 // Memory form: NOP m
3281 let sib = instr.sib.map(Sib::from_byte);
3282 let mem = address::build_mem_operand(
3283 &modrm,
3284 sib.as_ref(),
3285 instr.displacement,
3286 instr.prefixes.segment(),
3287 rex_b,
3288 rex_x,
3289 );
3290 // Default to 32-bit for multi-byte NOP
3291 let op_size = if rex_w {
3292 OperandSize::Size64
3293 } else {
3294 OperandSize::Size32
3295 };
3296 instr.operands[0] = Operand::memory(op_size, OperandAccess::Read, mem);
3297 }
3298 instr.operand_count = 1;
3299 } else {
3300 // Single-byte NOP (0x90) - no operands
3301 instr.operand_count = 0;
3302 }
3303 }
3304 Mnemonic::RET => {
3305 // No explicit operands
3306 instr.operand_count = 0;
3307 // Set stack pointer info for RET
3308 let stack_width = instr.stack_width;
3309 instr.stack_pointer_info = instruction::StackPointerInfo::ret(stack_width);
3310 }
3311 Mnemonic::INT3 => {
3312 instr.operand_count = 0;
3313 }
3314 Mnemonic::JMP | Mnemonic::CALL => {
3315 // Relative offset
3316 let offset_val = immediate::sign_extend(instr.immediate, instr.imm_size * 8);
3317 instr.operands[0] = Operand::relative_offset(offset_val, OperandSize::Size32);
3318 instr.operand_count = 1;
3319
3320 // Set stack pointer info for CALL
3321 if instr.mnemonic == Mnemonic::CALL {
3322 let stack_width = instr.stack_width;
3323 instr.stack_pointer_info = instruction::StackPointerInfo::call(stack_width);
3324 }
3325 }
3326 // Conditional jumps (Jcc) - 0x70-0x7F (short) and 0x0F 0x80-0x8F (near)
3327 Mnemonic::JO
3328 | Mnemonic::JNO
3329 | Mnemonic::JB
3330 | Mnemonic::JNB
3331 | Mnemonic::JZ
3332 | Mnemonic::JNZ
3333 | Mnemonic::JBE
3334 | Mnemonic::JA
3335 | Mnemonic::JS
3336 | Mnemonic::JNS
3337 | Mnemonic::JP
3338 | Mnemonic::JNP
3339 | Mnemonic::JL
3340 | Mnemonic::JGE
3341 | Mnemonic::JLE
3342 | Mnemonic::JG
3343 | Mnemonic::JE
3344 | Mnemonic::JNE
3345 | Mnemonic::JC
3346 | Mnemonic::JNC => {
3347 // Relative offset for conditional jumps
3348 let offset_val = immediate::sign_extend(instr.immediate, instr.imm_size * 8);
3349 instr.operands[0] = Operand::relative_offset(offset_val, OperandSize::Size32);
3350 instr.operand_count = 1;
3351 }
3352 // CMOVcc - Conditional Move (0F 40-4F)
3353 Mnemonic::CMOVO
3354 | Mnemonic::CMOVNO
3355 | Mnemonic::CMOVB
3356 | Mnemonic::CMOVAE
3357 | Mnemonic::CMOVE
3358 | Mnemonic::CMOVNE
3359 | Mnemonic::CMOVBE
3360 | Mnemonic::CMOVA
3361 | Mnemonic::CMOVS
3362 | Mnemonic::CMOVNS
3363 | Mnemonic::CMOVP
3364 | Mnemonic::CMOVNP
3365 | Mnemonic::CMOVL
3366 | Mnemonic::CMOVGE
3367 | Mnemonic::CMOVLE
3368 | Mnemonic::CMOVG
3369 | Mnemonic::CMOVC
3370 | Mnemonic::CMOVNC
3371 | Mnemonic::CMOVZ
3372 | Mnemonic::CMOVNZ => {
3373 // CMOVcc r, r/m - conditional move
3374 if let Some(modrm_byte) = instr.modrm {
3375 let modrm = ModRm::from_byte(modrm_byte);
3376 let is_64bit = rex_w || self.is_64_bit; // Default to 64-bit in 64-bit mode
3377 let op_size = if is_64bit {
3378 OperandSize::Size64
3379 } else if instr.prefixes.has_operand_size() {
3380 OperandSize::Size16
3381 } else {
3382 OperandSize::Size32
3383 };
3384
3385 // Destination: reg (conditional write)
3386 instr.operands[0] = Operand::register(
3387 modrm.reg_register(rex_r, is_64bit),
3388 op_size,
3389 OperandAccess::WriteConditional,
3390 );
3391
3392 // Source: r/m
3393 if modrm.is_register() {
3394 instr.operands[1] = Operand::register(
3395 modrm.rm_register(rex_b, is_64bit),
3396 op_size,
3397 OperandAccess::Read,
3398 );
3399 } else {
3400 let sib = instr.sib.map(Sib::from_byte);
3401 let mem = address::build_mem_operand(
3402 &modrm,
3403 sib.as_ref(),
3404 instr.displacement,
3405 instr.prefixes.segment(),
3406 rex_b,
3407 rex_x,
3408 );
3409 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
3410 }
3411 instr.operand_count = 2;
3412 }
3413 }
3414 // SETcc - Set Byte on Condition (0F 90-9F)
3415 Mnemonic::SETO
3416 | Mnemonic::SETNO
3417 | Mnemonic::SETB
3418 | Mnemonic::SETAE
3419 | Mnemonic::SETE
3420 | Mnemonic::SETNE
3421 | Mnemonic::SETBE
3422 | Mnemonic::SETNBE // SETA is alias for SETNBE
3423 | Mnemonic::SETS
3424 | Mnemonic::SETNS
3425 | Mnemonic::SETP
3426 | Mnemonic::SETNP
3427 | Mnemonic::SETL
3428 | Mnemonic::SETGE
3429 | Mnemonic::SETLE
3430 | Mnemonic::SETG
3431 | Mnemonic::SETC
3432 | Mnemonic::SETNC
3433 | Mnemonic::SETZ
3434 | Mnemonic::SETNZ => {
3435 // SETcc r/m8 - set byte on condition
3436 if let Some(modrm_byte) = instr.modrm {
3437 let modrm = ModRm::from_byte(modrm_byte);
3438
3439 // Destination: r/m8 (conditional write)
3440 if modrm.is_register() {
3441 instr.operands[0] = Operand::register(
3442 modrm::get_register_8(modrm.rm, rex_b),
3443 OperandSize::Size8,
3444 OperandAccess::WriteConditional,
3445 );
3446 } else {
3447 let sib = instr.sib.map(Sib::from_byte);
3448 let mem = address::build_mem_operand(
3449 &modrm,
3450 sib.as_ref(),
3451 instr.displacement,
3452 instr.prefixes.segment(),
3453 rex_b,
3454 rex_x,
3455 );
3456 instr.operands[0] = Operand::memory(OperandSize::Size8, OperandAccess::WriteConditional, mem);
3457 }
3458 instr.operand_count = 1;
3459 }
3460 }
3461 // SSE/SSE2 Packed and Scalar Instructions
3462 Mnemonic::ADDPS
3463 | Mnemonic::ADDPD
3464 | Mnemonic::ADDSS
3465 | Mnemonic::ADDSD
3466 | Mnemonic::SUBPS
3467 | Mnemonic::SUBPD
3468 | Mnemonic::SUBSS
3469 | Mnemonic::SUBSD
3470 | Mnemonic::MULPS
3471 | Mnemonic::MULPD
3472 | Mnemonic::MULSS
3473 | Mnemonic::MULSD
3474 | Mnemonic::DIVPS
3475 | Mnemonic::DIVPD
3476 | Mnemonic::DIVSS
3477 | Mnemonic::DIVSD
3478 | Mnemonic::ANDPS
3479 | Mnemonic::ANDPD
3480 | Mnemonic::ANDNPS
3481 | Mnemonic::ANDNPD
3482 | Mnemonic::ORPS
3483 | Mnemonic::ORPD
3484 | Mnemonic::XORPS
3485 | Mnemonic::XORPD
3486 | Mnemonic::MOVAPS
3487 | Mnemonic::MOVAPD
3488 | Mnemonic::MOVUPS
3489 | Mnemonic::MOVUPD
3490 | Mnemonic::UNPCKLPS
3491 | Mnemonic::UNPCKLPD
3492 | Mnemonic::UNPCKHPS
3493 | Mnemonic::UNPCKHPD
3494 | Mnemonic::COMISS
3495 | Mnemonic::COMISD
3496 | Mnemonic::UCOMISS
3497 | Mnemonic::UCOMISD
3498 | Mnemonic::MOVSS => {
3499 // SSE arithmetic/logical instructions: xmm1, xmm2/m128 or xmm1, xmm2/m32
3500 if let Some(modrm_byte) = instr.modrm {
3501 let modrm = ModRm::from_byte(modrm_byte);
3502
3503 // Determine operand size based on instruction
3504 let op_size = match instr.mnemonic {
3505 // Scalar single-precision (32-bit)
3506 Mnemonic::ADDSS
3507 | Mnemonic::SUBSS
3508 | Mnemonic::MULSS
3509 | Mnemonic::DIVSS
3510 | Mnemonic::MOVSS
3511 | Mnemonic::COMISS
3512 | Mnemonic::UCOMISS => OperandSize::Size32,
3513 // Scalar double-precision (64-bit)
3514 Mnemonic::ADDSD
3515 | Mnemonic::SUBSD
3516 | Mnemonic::MULSD
3517 | Mnemonic::DIVSD
3518 | Mnemonic::COMISD
3519 | Mnemonic::UCOMISD => OperandSize::Size64,
3520 // Packed (128-bit)
3521 _ => OperandSize::Size128,
3522 };
3523
3524 // Destination is always xmm register (reg field)
3525 instr.operands[0] = Operand::register(
3526 modrm::get_xmm_register(modrm.reg, rex_r),
3527 op_size,
3528 OperandAccess::ReadWrite,
3529 );
3530
3531 // Source is xmm register or memory (r/m field)
3532 if modrm.is_register() {
3533 instr.operands[1] = Operand::register(
3534 modrm::get_xmm_register(modrm.rm, rex_b),
3535 op_size,
3536 OperandAccess::Read,
3537 );
3538 } else {
3539 let sib = instr.sib.map(Sib::from_byte);
3540 let mem = address::build_mem_operand(
3541 &modrm,
3542 sib.as_ref(),
3543 instr.displacement,
3544 instr.prefixes.segment(),
3545 rex_b,
3546 rex_x,
3547 );
3548 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
3549 }
3550 instr.operand_count = 2;
3551 }
3552 }
3553 // x87 FPU instructions with no operands (these have ModRM but no explicit operands)
3554 Mnemonic::FLD1
3555 | Mnemonic::FLDZ
3556 | Mnemonic::FLDPI
3557 | Mnemonic::FLDL2E
3558 | Mnemonic::FLDL2T
3559 | Mnemonic::FLDLG2
3560 | Mnemonic::FLDLN2
3561 | Mnemonic::FCHS
3562 | Mnemonic::FABS
3563 | Mnemonic::FTST
3564 | Mnemonic::FXAM
3565 | Mnemonic::F2XM1
3566 | Mnemonic::FYL2X
3567 | Mnemonic::FPTAN
3568 | Mnemonic::FPATAN
3569 | Mnemonic::FXTRACT
3570 | Mnemonic::FPREM1
3571 | Mnemonic::FDECSTP
3572 | Mnemonic::FINCSTP
3573 | Mnemonic::FPREM
3574 | Mnemonic::FYL2XP1
3575 | Mnemonic::FSQRT
3576 | Mnemonic::FSINCOS
3577 | Mnemonic::FRNDINT
3578 | Mnemonic::FSCALE
3579 | Mnemonic::FSIN
3580 | Mnemonic::FCOS
3581 // x87 FPU arithmetic (register form uses implicit ST(i) operands)
3582 | Mnemonic::FADD
3583 | Mnemonic::FADDP
3584 | Mnemonic::FSUB
3585 | Mnemonic::FSUBP
3586 | Mnemonic::FSUBR
3587 | Mnemonic::FSUBRP
3588 | Mnemonic::FMUL
3589 | Mnemonic::FMULP
3590 | Mnemonic::FDIV
3591 | Mnemonic::FDIVP
3592 | Mnemonic::FDIVR
3593 | Mnemonic::FDIVRP
3594 | Mnemonic::FCOM
3595 | Mnemonic::FCOMP
3596 | Mnemonic::FUCOM
3597 | Mnemonic::FUCOMP
3598 | Mnemonic::FUCOMPP
3599 | Mnemonic::FCOMI
3600 | Mnemonic::FUCOMI
3601 | Mnemonic::FXCH
3602 | Mnemonic::FFREE
3603 | Mnemonic::FCMOVB
3604 | Mnemonic::FCMOVE
3605 | Mnemonic::FCMOVBE
3606 | Mnemonic::FCMOVU
3607 | Mnemonic::FCMOVNB
3608 | Mnemonic::FCMOVNE
3609 | Mnemonic::FCMOVNBE
3610 | Mnemonic::FCMOVNU
3611 | Mnemonic::FNSTSW => {
3612 // x87 instructions with implicit operands (no explicit operands)
3613 instr.operand_count = 0;
3614 }
3615 // Intel VT-x and AMD SVM instructions with no explicit operands
3616 Mnemonic::VMCALL
3617 | Mnemonic::VMLAUNCH
3618 | Mnemonic::VMRESUME
3619 | Mnemonic::VMXOFF
3620 | Mnemonic::VMREAD
3621 | Mnemonic::VMWRITE
3622 | Mnemonic::VMRUN
3623 | Mnemonic::VMMCALL
3624 | Mnemonic::VMLOAD
3625 | Mnemonic::VMSAVE
3626 | Mnemonic::CLGI
3627 | Mnemonic::STGI
3628 | Mnemonic::ENCLS
3629 | Mnemonic::ENCLU
3630 | Mnemonic::ENCLV
3631 | Mnemonic::SERIALIZE
3632 | Mnemonic::XGETBV
3633 | Mnemonic::XSETBV
3634 | Mnemonic::CLAC
3635 | Mnemonic::STAC
3636 | Mnemonic::SWAPGS
3637 | Mnemonic::RDTSCP => {
3638 // These instructions use implicit registers and have no explicit operands
3639 instr.operand_count = 0;
3640 }
3641 // x87 FPU instructions with ST(i) operand (register form)
3642 Mnemonic::FLD | Mnemonic::FST | Mnemonic::FSTP => {
3643 // These have explicit ST(i) operand in register form
3644 // The operand_count is already set during decode
3645 }
3646 _ => {
3647 // Generic operand building for instructions with ModRM
3648 if let Some(modrm_byte) = instr.modrm {
3649 let modrm = ModRm::from_byte(modrm_byte);
3650 let is_64bit = rex_w;
3651 let op_size = if is_64bit {
3652 OperandSize::Size64
3653 } else {
3654 OperandSize::Size32
3655 };
3656
3657 if modrm.is_register() {
3658 instr.operands[0] = Operand::register(
3659 modrm.reg_register(rex_r, is_64bit),
3660 op_size,
3661 OperandAccess::ReadWrite,
3662 );
3663 instr.operands[1] = Operand::register(
3664 modrm.rm_register(rex_b, is_64bit),
3665 op_size,
3666 OperandAccess::Read,
3667 );
3668 } else {
3669 let sib = instr.sib.map(Sib::from_byte);
3670 let mem = address::build_mem_operand(
3671 &modrm,
3672 sib.as_ref(),
3673 instr.displacement,
3674 instr.prefixes.segment(),
3675 rex_b,
3676 rex_x,
3677 );
3678 instr.operands[0] = Operand::memory(op_size, OperandAccess::ReadWrite, mem);
3679 instr.operands[1] = Operand::register(
3680 modrm.reg_register(rex_r, is_64bit),
3681 op_size,
3682 OperandAccess::Read,
3683 );
3684 }
3685 instr.operand_count = 2;
3686 }
3687 }
3688 }
3689
3690 Ok(())
3691 }
3692
3693 /// Calculate effective operand and address sizes
3694 fn calculate_effective_sizes(&self, instr: &mut DecodedInstruction) {
3695 // EVEX and VEX instructions have their operand size set during opcode parsing
3696 if instr.prefixes.has_evex() || instr.prefixes.has_vex() {
3697 return;
3698 }
3699
3700 let has_osz = instr.prefixes.has_operand_size();
3701 let has_asz = instr.prefixes.has_address_size();
3702 let has_rex_w = instr.prefixes.effective_w();
3703
3704 instr.operand_size = address::effective_operand_size(
3705 has_osz,
3706 has_rex_w,
3707 self.is_64_bit,
3708 self.default_operand_size,
3709 );
3710
3711 instr.address_size = address::effective_address_size(has_asz, self.is_64_bit);
3712 }
3713
3714 /// Parse VEX-encoded opcode
3715 fn parse_vex_opcode(
3716 &self,
3717 bytes: &[u8],
3718 mut offset: usize,
3719 instr: &mut DecodedInstruction,
3720 ) -> Result<usize> {
3721 if offset >= bytes.len() {
3722 return Err(Error::InsufficientBytes);
3723 }
3724
3725 let vex = &instr.prefixes.vex;
3726 let opcode = bytes[offset];
3727 instr.opcode = opcode;
3728 offset += 1;
3729
3730 // Look up VEX instruction
3731 if let Some(vex_def) =
3732 crate::data::vex_instructions::lookup_vex_instruction(vex.map, opcode, vex.pp, vex.w)
3733 {
3734 instr.mnemonic = vex_def.mnemonic;
3735 instr.category = vex_def.category;
3736 instr.isa_set = vex_def.isa_set;
3737 instr.attributes =
3738 vex_def.attributes | InstructionAttributes::IS_VEX | InstructionAttributes::IS_SIMD;
3739 instr.operand_count = vex_def.operand_count;
3740
3741 // Set operand size based on VEX.L (vector length)
3742 instr.operand_size = if vex.l { 256 } else { 128 };
3743 } else {
3744 // Unknown VEX instruction
3745 instr.mnemonic = Mnemonic::Invalid;
3746 instr.category = InstructionCategory::Invalid;
3747 instr.isa_set = IsaSet::Avx;
3748 }
3749
3750 Ok(offset)
3751 }
3752
3753 /// Build operands for VEX-encoded instructions
3754 fn build_vex_operands(&self, instr: &mut DecodedInstruction) -> Result<()> {
3755 let vex = &instr.prefixes.vex;
3756 let vex_l = vex.l; // 256-bit (true) or 128-bit (false)
3757
3758 // Get ModRM if present
3759 let modrm = instr.modrm.map(ModRm::from_byte);
3760 let rex_r = instr.prefixes.effective_r();
3761 let rex_b = instr.prefixes.effective_b();
3762 let rex_x = instr.prefixes.effective_x();
3763
3764 // Determine operand size based on VEX.L
3765 let op_size = if vex_l {
3766 OperandSize::Size256
3767 } else {
3768 OperandSize::Size128
3769 };
3770
3771 match instr.mnemonic {
3772 // 3-operand arithmetic: VADDPS, VADDPD, VSUBPS, VSUBPD, VMULPS, VMULPD, VDIVPS, VDIVPD, VXORPS, VXORPD, etc.
3773 Mnemonic::VADDPS
3774 | Mnemonic::VADDPD
3775 | Mnemonic::VSUBPS
3776 | Mnemonic::VSUBPD
3777 | Mnemonic::VMULPS
3778 | Mnemonic::VMULPD
3779 | Mnemonic::VDIVPS
3780 | Mnemonic::VDIVPD
3781 | Mnemonic::VADDSS
3782 | Mnemonic::VADDSD
3783 | Mnemonic::VSUBSS
3784 | Mnemonic::VSUBSD
3785 | Mnemonic::VMULSS
3786 | Mnemonic::VMULSD
3787 | Mnemonic::VDIVSS
3788 | Mnemonic::VDIVSD
3789 | Mnemonic::VXORPS
3790 | Mnemonic::VXORPD
3791 | Mnemonic::VANDPS
3792 | Mnemonic::VANDPD
3793 | Mnemonic::VORPS
3794 | Mnemonic::VORPD
3795 | Mnemonic::VUNPCKLPS
3796 | Mnemonic::VUNPCKHPS => {
3797 if let Some(modrm) = modrm {
3798 // VEX.vvvv is the first source operand (inverted encoding)
3799 let vvvv_reg = get_vector_register(vex.vvvv, vex_l, false);
3800 // ModRM.reg is the destination
3801 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
3802
3803 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
3804 instr.operands[1] = Operand::register(vvvv_reg, op_size, OperandAccess::Read);
3805
3806 if modrm.is_register() {
3807 let src_reg = get_vector_register(modrm.rm, vex_l, rex_b);
3808 instr.operands[2] =
3809 Operand::register(src_reg, op_size, OperandAccess::Read);
3810 } else {
3811 // Memory operand
3812 let sib = instr.sib.map(Sib::from_byte);
3813 let mem = address::build_mem_operand(
3814 &modrm,
3815 sib.as_ref(),
3816 instr.displacement,
3817 instr.prefixes.segment(),
3818 rex_b,
3819 rex_x,
3820 );
3821 instr.operands[2] = Operand::memory(op_size, OperandAccess::Read, mem);
3822 }
3823 instr.operand_count = 3;
3824 }
3825 }
3826
3827 // 2-operand moves: VMOVAPS, VMOVUPS, VMOVAPD, VMOVUPD, VMOVSS, VMOVSD
3828 Mnemonic::VMOVAPS
3829 | Mnemonic::VMOVUPS
3830 | Mnemonic::VMOVAPD
3831 | Mnemonic::VMOVUPD
3832 | Mnemonic::VMOVSS
3833 | Mnemonic::VMOVSD => {
3834 if let Some(modrm) = modrm {
3835 // For load form (opcode 0x10, 0x28): dest = reg, src = r/m
3836 // For store form (opcode 0x11, 0x29): dest = r/m, src = reg
3837 let is_load = instr.opcode == 0x10 || instr.opcode == 0x28;
3838
3839 if is_load {
3840 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
3841 instr.operands[0] =
3842 Operand::register(dest_reg, op_size, OperandAccess::Write);
3843
3844 if modrm.is_register() {
3845 let src_reg = get_vector_register(modrm.rm, vex_l, rex_b);
3846 instr.operands[1] =
3847 Operand::register(src_reg, op_size, OperandAccess::Read);
3848 } else {
3849 let sib = instr.sib.map(Sib::from_byte);
3850 let mem = address::build_mem_operand(
3851 &modrm,
3852 sib.as_ref(),
3853 instr.displacement,
3854 instr.prefixes.segment(),
3855 rex_b,
3856 rex_x,
3857 );
3858 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
3859 }
3860 } else {
3861 // Store form
3862 let src_reg = get_vector_register(modrm.reg, vex_l, rex_r);
3863 instr.operands[0] =
3864 Operand::register(src_reg, op_size, OperandAccess::Read);
3865
3866 if modrm.is_register() {
3867 let dest_reg = get_vector_register(modrm.rm, vex_l, rex_b);
3868 instr.operands[1] =
3869 Operand::register(dest_reg, op_size, OperandAccess::Write);
3870 } else {
3871 let sib = instr.sib.map(Sib::from_byte);
3872 let mem = address::build_mem_operand(
3873 &modrm,
3874 sib.as_ref(),
3875 instr.displacement,
3876 instr.prefixes.segment(),
3877 rex_b,
3878 rex_x,
3879 );
3880 instr.operands[1] = Operand::memory(op_size, OperandAccess::Write, mem);
3881 }
3882 }
3883 instr.operand_count = 2;
3884 }
3885 }
3886
3887 // 4-operand with immediate: VSHUFPS, VSHUFPD
3888 Mnemonic::VSHUFPS | Mnemonic::VSHUFPD => {
3889 if let Some(modrm) = modrm {
3890 let vvvv_reg = get_vector_register(vex.vvvv, vex_l, false);
3891 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
3892
3893 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
3894 instr.operands[1] = Operand::register(vvvv_reg, op_size, OperandAccess::Read);
3895
3896 if modrm.is_register() {
3897 let src_reg = get_vector_register(modrm.rm, vex_l, rex_b);
3898 instr.operands[2] =
3899 Operand::register(src_reg, op_size, OperandAccess::Read);
3900 } else {
3901 let sib = instr.sib.map(Sib::from_byte);
3902 let mem = address::build_mem_operand(
3903 &modrm,
3904 sib.as_ref(),
3905 instr.displacement,
3906 instr.prefixes.segment(),
3907 rex_b,
3908 rex_x,
3909 );
3910 instr.operands[2] = Operand::memory(op_size, OperandAccess::Read, mem);
3911 }
3912
3913 // Immediate operand (shuffle control)
3914 instr.operands[3] = Operand::immediate(instr.immediate, OperandSize::Size8);
3915 instr.operand_count = 4;
3916 }
3917 }
3918
3919 // VBROADCASTSS
3920 Mnemonic::VBROADCASTSS => {
3921 if let Some(modrm) = modrm {
3922 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
3923 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
3924
3925 if modrm.is_register() {
3926 // Register form broadcasts from XMM register
3927 let src_reg = get_vector_register(modrm.rm, false, rex_b); // Always XMM for source
3928 instr.operands[1] =
3929 Operand::register(src_reg, OperandSize::Size32, OperandAccess::Read);
3930 } else {
3931 // Memory form broadcasts from 32-bit memory location
3932 let sib = instr.sib.map(Sib::from_byte);
3933 let mem = address::build_mem_operand(
3934 &modrm,
3935 sib.as_ref(),
3936 instr.displacement,
3937 instr.prefixes.segment(),
3938 rex_b,
3939 rex_x,
3940 );
3941 instr.operands[1] =
3942 Operand::memory(OperandSize::Size32, OperandAccess::Read, mem);
3943 }
3944 instr.operand_count = 2;
3945 }
3946 }
3947
3948 // FMA instructions: VFMADD132PS, VFMADD213PS, VFMADD231PS, etc.
3949 // All FMA instructions have 3 operands: dest (ModRM.reg), src1 (VEX.vvvv), src2 (ModRM.r/m)
3950 Mnemonic::VFMADD132PS
3951 | Mnemonic::VFMADD213PS
3952 | Mnemonic::VFMADD231PS
3953 | Mnemonic::VFMADD132PD
3954 | Mnemonic::VFMADD213PD
3955 | Mnemonic::VFMADD231PD
3956 | Mnemonic::VFMADD132SS
3957 | Mnemonic::VFMADD213SS
3958 | Mnemonic::VFMADD231SS
3959 | Mnemonic::VFMADD132SD
3960 | Mnemonic::VFMADD213SD
3961 | Mnemonic::VFMADD231SD
3962 | Mnemonic::VFMSUB132PS
3963 | Mnemonic::VFMSUB213PS
3964 | Mnemonic::VFMSUB231PS
3965 | Mnemonic::VFMSUB132PD
3966 | Mnemonic::VFMSUB213PD
3967 | Mnemonic::VFMSUB231PD
3968 | Mnemonic::VFMSUB132SS
3969 | Mnemonic::VFMSUB213SS
3970 | Mnemonic::VFMSUB231SS
3971 | Mnemonic::VFMSUB132SD
3972 | Mnemonic::VFMSUB213SD
3973 | Mnemonic::VFMSUB231SD
3974 | Mnemonic::VFNMADD132PS
3975 | Mnemonic::VFNMADD213PS
3976 | Mnemonic::VFNMADD231PS
3977 | Mnemonic::VFNMADD132PD
3978 | Mnemonic::VFNMADD213PD
3979 | Mnemonic::VFNMADD231PD
3980 | Mnemonic::VFNMADD132SS
3981 | Mnemonic::VFNMADD213SS
3982 | Mnemonic::VFNMADD231SS
3983 | Mnemonic::VFNMADD132SD
3984 | Mnemonic::VFNMADD213SD
3985 | Mnemonic::VFNMADD231SD
3986 | Mnemonic::VFNMSUB132PS
3987 | Mnemonic::VFNMSUB213PS
3988 | Mnemonic::VFNMSUB231PS
3989 | Mnemonic::VFNMSUB132PD
3990 | Mnemonic::VFNMSUB213PD
3991 | Mnemonic::VFNMSUB231PD
3992 | Mnemonic::VFNMSUB132SS
3993 | Mnemonic::VFNMSUB213SS
3994 | Mnemonic::VFNMSUB231SS
3995 | Mnemonic::VFNMSUB132SD
3996 | Mnemonic::VFNMSUB213SD
3997 | Mnemonic::VFNMSUB231SD => {
3998 if let Some(modrm) = modrm {
3999 // VEX.vvvv is the first source operand (inverted encoding)
4000 let src1_reg = get_vector_register(vex.vvvv, vex_l, false);
4001 // ModRM.reg is the destination
4002 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
4003
4004 instr.operands[0] =
4005 Operand::register(dest_reg, op_size, OperandAccess::ReadWrite);
4006 instr.operands[1] = Operand::register(src1_reg, op_size, OperandAccess::Read);
4007
4008 if modrm.is_register() {
4009 let src2_reg = get_vector_register(modrm.rm, vex_l, rex_b);
4010 instr.operands[2] =
4011 Operand::register(src2_reg, op_size, OperandAccess::Read);
4012 } else {
4013 let sib = instr.sib.map(Sib::from_byte);
4014 let mem = address::build_mem_operand(
4015 &modrm,
4016 sib.as_ref(),
4017 instr.displacement,
4018 instr.prefixes.segment(),
4019 rex_b,
4020 rex_x,
4021 );
4022 instr.operands[2] = Operand::memory(op_size, OperandAccess::Read, mem);
4023 }
4024 instr.operand_count = 3;
4025 }
4026 }
4027
4028 _ => {
4029 // Generic VEX operand handling - use default behavior
4030 if let Some(modrm) = modrm {
4031 let dest_reg = get_vector_register(modrm.reg, vex_l, rex_r);
4032 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4033
4034 if modrm.is_register() {
4035 let src_reg = get_vector_register(modrm.rm, vex_l, rex_b);
4036 instr.operands[1] =
4037 Operand::register(src_reg, op_size, OperandAccess::Read);
4038 } else {
4039 let sib = instr.sib.map(Sib::from_byte);
4040 let mem = address::build_mem_operand(
4041 &modrm,
4042 sib.as_ref(),
4043 instr.displacement,
4044 instr.prefixes.segment(),
4045 rex_b,
4046 rex_x,
4047 );
4048 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
4049 }
4050 instr.operand_count = 2;
4051 }
4052 }
4053 }
4054
4055 Ok(())
4056 }
4057
4058 /// Parse EVEX-encoded opcode (AVX-512)
4059 fn parse_evex_opcode(
4060 &self,
4061 bytes: &[u8],
4062 mut offset: usize,
4063 instr: &mut DecodedInstruction,
4064 ) -> Result<usize> {
4065 if offset >= bytes.len() {
4066 return Err(Error::InsufficientBytes);
4067 }
4068
4069 let evex = &instr.prefixes.evex;
4070 let opcode = bytes[offset];
4071 instr.opcode = opcode;
4072 offset += 1;
4073
4074 // Look up EVEX instruction
4075 if let Some(evex_def) = crate::data::evex_instructions::lookup_evex_instruction(
4076 evex.map, opcode, evex.pp, evex.w,
4077 ) {
4078 instr.mnemonic = evex_def.mnemonic;
4079 instr.category = evex_def.category;
4080 instr.isa_set = evex_def.isa_set;
4081 instr.attributes = evex_def.attributes | InstructionAttributes::IS_SIMD;
4082 instr.operand_count = evex_def.operand_count;
4083
4084 // Set operand size based on EVEX.L'L (vector length)
4085 instr.operand_size = match evex.ll {
4086 0 | 1 => 128, // L'L = 00 or 01 = 128-bit (XMM)
4087 2 => 256, // L'L = 10 = 256-bit (YMM)
4088 3 => 512, // L'L = 11 = 512-bit (ZMM)
4089 _ => 128,
4090 };
4091 } else {
4092 // Unknown EVEX instruction
4093 instr.mnemonic = Mnemonic::Invalid;
4094 instr.category = InstructionCategory::Invalid;
4095 instr.isa_set = IsaSet::Avx512F512;
4096 }
4097
4098 Ok(offset)
4099 }
4100
4101 /// Build operands for EVEX-encoded instructions (AVX-512)
4102 fn build_evex_operands(&self, instr: &mut DecodedInstruction) -> Result<()> {
4103 let evex = &instr.prefixes.evex;
4104
4105 // Determine vector length
4106 let (is_512, is_256) = match evex.ll {
4107 3 => (true, false), // 512-bit (ZMM)
4108 2 => (false, true), // 256-bit (YMM)
4109 _ => (false, false), // 128-bit (XMM)
4110 };
4111
4112 // Get ModRM if present
4113 let modrm = instr.modrm.map(ModRm::from_byte);
4114 let rex_r = instr.prefixes.effective_r();
4115 let rex_b = instr.prefixes.effective_b();
4116 let rex_x = instr.prefixes.effective_x();
4117
4118 // Determine operand size based on EVEX.L'L
4119 let op_size = if is_512 {
4120 OperandSize::Size512
4121 } else if is_256 {
4122 OperandSize::Size256
4123 } else {
4124 OperandSize::Size128
4125 };
4126
4127 // Get mask register if used (aaa field)
4128 let _mask_reg = evex.aaa; // 0-7, where 0 means no masking
4129 let _is_zeroing = evex.z; // Zeroing mode if true
4130
4131 match instr.mnemonic {
4132 // AVX-512 ALU instructions: VPADDD, VPADDQ, VPSUBD, VPSUBQ, VPANDD, VPANDQ, VPORD, VPORQ, VPXORD, VPXORQ
4133 // These follow the pattern: dest, src1, src2 {k}{z}
4134 Mnemonic::VPADDD
4135 | Mnemonic::VPADDQ
4136 | Mnemonic::VPSUBD
4137 | Mnemonic::VPSUBQ
4138 | Mnemonic::VPANDD
4139 | Mnemonic::VPANDQ
4140 | Mnemonic::VPORD
4141 | Mnemonic::VPORQ
4142 | Mnemonic::VPXORD
4143 | Mnemonic::VPXORQ
4144 | Mnemonic::VPSLLD
4145 | Mnemonic::VPSLLQ
4146 | Mnemonic::VPSRLD
4147 | Mnemonic::VPSRLQ
4148 | Mnemonic::VPSRAD => {
4149 if let Some(modrm) = modrm {
4150 // EVEX.vvvv is the first source operand (inverted encoding)
4151 let vvvv_reg = get_evex_register(evex.vvvv, is_512, is_256, evex.vp);
4152 // ModRM.reg is the destination
4153 let dest_reg = get_evex_register(modrm.reg, is_512, is_256, rex_r);
4154
4155 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4156 instr.operands[1] = Operand::register(vvvv_reg, op_size, OperandAccess::Read);
4157
4158 if modrm.is_register() {
4159 let src_reg = get_evex_register(modrm.rm, is_512, is_256, rex_b);
4160 instr.operands[2] =
4161 Operand::register(src_reg, op_size, OperandAccess::Read);
4162 } else {
4163 // Memory operand
4164 let sib = instr.sib.map(Sib::from_byte);
4165 let mem = address::build_mem_operand(
4166 &modrm,
4167 sib.as_ref(),
4168 instr.displacement,
4169 instr.prefixes.segment(),
4170 rex_b,
4171 rex_x,
4172 );
4173 instr.operands[2] = Operand::memory(op_size, OperandAccess::Read, mem);
4174 }
4175 instr.operand_count = 3;
4176 }
4177 }
4178
4179 // AVX-512 Permute instructions
4180 Mnemonic::VPERMD | Mnemonic::VPERMQ => {
4181 if let Some(modrm) = modrm {
4182 let vvvv_reg = get_evex_register(evex.vvvv, is_512, is_256, evex.vp);
4183 let dest_reg = get_evex_register(modrm.reg, is_512, is_256, rex_r);
4184
4185 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4186 instr.operands[1] = Operand::register(vvvv_reg, op_size, OperandAccess::Read);
4187
4188 if modrm.is_register() {
4189 let src_reg = get_evex_register(modrm.rm, is_512, is_256, rex_b);
4190 instr.operands[2] =
4191 Operand::register(src_reg, op_size, OperandAccess::Read);
4192 } else {
4193 let sib = instr.sib.map(Sib::from_byte);
4194 let mem = address::build_mem_operand(
4195 &modrm,
4196 sib.as_ref(),
4197 instr.displacement,
4198 instr.prefixes.segment(),
4199 rex_b,
4200 rex_x,
4201 );
4202 instr.operands[2] = Operand::memory(op_size, OperandAccess::Read, mem);
4203 }
4204 instr.operand_count = 3;
4205 }
4206 }
4207
4208 // AVX-512 Broadcast instructions
4209 Mnemonic::VPBROADCASTD | Mnemonic::VPBROADCASTQ => {
4210 if let Some(modrm) = modrm {
4211 let dest_reg = get_evex_register(modrm.reg, is_512, is_256, rex_r);
4212 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4213
4214 // Source is either a GPR or memory
4215 if modrm.is_register() {
4216 // For register form, use general purpose register
4217 let src_reg = if evex.w {
4218 modrm::get_register_64(modrm.rm, rex_b)
4219 } else {
4220 modrm::get_register_32(modrm.rm, rex_b)
4221 };
4222 let src_size = if evex.w {
4223 OperandSize::Size64
4224 } else {
4225 OperandSize::Size32
4226 };
4227 instr.operands[1] =
4228 Operand::register(src_reg, src_size, OperandAccess::Read);
4229 } else {
4230 let sib = instr.sib.map(Sib::from_byte);
4231 let mem = address::build_mem_operand(
4232 &modrm,
4233 sib.as_ref(),
4234 instr.displacement,
4235 instr.prefixes.segment(),
4236 rex_b,
4237 rex_x,
4238 );
4239 let src_size = if evex.w {
4240 OperandSize::Size64
4241 } else {
4242 OperandSize::Size32
4243 };
4244 instr.operands[1] = Operand::memory(src_size, OperandAccess::Read, mem);
4245 }
4246 instr.operand_count = 2;
4247 }
4248 }
4249
4250 // AVX-512 Permute with immediate: VPERMILPS, VPERMILPD, VPERMPD, VPERMPS
4251 Mnemonic::VPERMILPS | Mnemonic::VPERMILPD | Mnemonic::VPERMPD | Mnemonic::VPERMPS => {
4252 if let Some(modrm) = modrm {
4253 let dest_reg = get_evex_register(modrm.reg, is_512, is_256, rex_r);
4254 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4255
4256 if modrm.is_register() {
4257 let src_reg = get_evex_register(modrm.rm, is_512, is_256, rex_b);
4258 instr.operands[1] =
4259 Operand::register(src_reg, op_size, OperandAccess::Read);
4260 } else {
4261 let sib = instr.sib.map(Sib::from_byte);
4262 let mem = address::build_mem_operand(
4263 &modrm,
4264 sib.as_ref(),
4265 instr.displacement,
4266 instr.prefixes.segment(),
4267 rex_b,
4268 rex_x,
4269 );
4270 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
4271 }
4272
4273 // Immediate operand
4274 instr.operands[2] = Operand::immediate(instr.immediate, OperandSize::Size8);
4275 instr.operand_count = 3;
4276 }
4277 }
4278
4279 _ => {
4280 // Generic EVEX operand handling
4281 if let Some(modrm) = modrm {
4282 let dest_reg = get_evex_register(modrm.reg, is_512, is_256, rex_r);
4283 instr.operands[0] = Operand::register(dest_reg, op_size, OperandAccess::Write);
4284
4285 if modrm.is_register() {
4286 let src_reg = get_evex_register(modrm.rm, is_512, is_256, rex_b);
4287 instr.operands[1] =
4288 Operand::register(src_reg, op_size, OperandAccess::Read);
4289 } else {
4290 let sib = instr.sib.map(Sib::from_byte);
4291 let mem = address::build_mem_operand(
4292 &modrm,
4293 sib.as_ref(),
4294 instr.displacement,
4295 instr.prefixes.segment(),
4296 rex_b,
4297 rex_x,
4298 );
4299 instr.operands[1] = Operand::memory(op_size, OperandAccess::Read, mem);
4300 }
4301 instr.operand_count = 2;
4302 }
4303 }
4304 }
4305
4306 Ok(())
4307 }
4308}
4309
4310/// Get a vector register (XMM or YMM) by index
4311///
4312/// # Arguments
4313/// * `index` - Register index (0-15)
4314/// * `is_256` - If true, return YMM register; otherwise XMM
4315/// * `rex_ext` - REX.R or REX.B extension bit
4316#[must_use]
4317pub const fn get_vector_register(index: u8, is_256: bool, rex_ext: bool) -> Register {
4318 // Combine index with extension bit to get full register number (0-15)
4319 let reg_num = if rex_ext { index + 8 } else { index };
4320
4321 if is_256 {
4322 // YMM registers
4323 match reg_num {
4324 0 => Register::YMM0,
4325 1 => Register::YMM1,
4326 2 => Register::YMM2,
4327 3 => Register::YMM3,
4328 4 => Register::YMM4,
4329 5 => Register::YMM5,
4330 6 => Register::YMM6,
4331 7 => Register::YMM7,
4332 8 => Register::YMM8,
4333 9 => Register::YMM9,
4334 10 => Register::YMM10,
4335 11 => Register::YMM11,
4336 12 => Register::YMM12,
4337 13 => Register::YMM13,
4338 14 => Register::YMM14,
4339 15 => Register::YMM15,
4340 _ => Register::None,
4341 }
4342 } else {
4343 // XMM registers
4344 match reg_num {
4345 0 => Register::XMM0,
4346 1 => Register::XMM1,
4347 2 => Register::XMM2,
4348 3 => Register::XMM3,
4349 4 => Register::XMM4,
4350 5 => Register::XMM5,
4351 6 => Register::XMM6,
4352 7 => Register::XMM7,
4353 8 => Register::XMM8,
4354 9 => Register::XMM9,
4355 10 => Register::XMM10,
4356 11 => Register::XMM11,
4357 12 => Register::XMM12,
4358 13 => Register::XMM13,
4359 14 => Register::XMM14,
4360 15 => Register::XMM15,
4361 _ => Register::None,
4362 }
4363 }
4364}
4365
4366/// Get an EVEX vector register (XMM, YMM, or ZMM) by index
4367///
4368/// # Arguments
4369/// * `index` - Register index (0-15) from ModRM or vvvv field
4370/// * `is_512` - If true, return ZMM register
4371/// * `is_256` - If true and not 512, return YMM register; otherwise XMM
4372/// * `ext` - REX.R, REX.B, or V'/R' extension bit from EVEX (true = high 16 registers)
4373#[must_use]
4374pub const fn get_evex_register(index: u8, is_512: bool, is_256: bool, ext: bool) -> Register {
4375 // Combine index with extension bit to get full register number (0-31)
4376 // When ext is true, we access the upper 16 registers (16-31)
4377 let reg_num = if ext { index + 16 } else { index };
4378
4379 if is_512 {
4380 // ZMM registers (512-bit)
4381 match reg_num {
4382 0 => Register::ZMM0,
4383 1 => Register::ZMM1,
4384 2 => Register::ZMM2,
4385 3 => Register::ZMM3,
4386 4 => Register::ZMM4,
4387 5 => Register::ZMM5,
4388 6 => Register::ZMM6,
4389 7 => Register::ZMM7,
4390 8 => Register::ZMM8,
4391 9 => Register::ZMM9,
4392 10 => Register::ZMM10,
4393 11 => Register::ZMM11,
4394 12 => Register::ZMM12,
4395 13 => Register::ZMM13,
4396 14 => Register::ZMM14,
4397 15 => Register::ZMM15,
4398 16 => Register::ZMM16,
4399 17 => Register::ZMM17,
4400 18 => Register::ZMM18,
4401 19 => Register::ZMM19,
4402 20 => Register::ZMM20,
4403 21 => Register::ZMM21,
4404 22 => Register::ZMM22,
4405 23 => Register::ZMM23,
4406 24 => Register::ZMM24,
4407 25 => Register::ZMM25,
4408 26 => Register::ZMM26,
4409 27 => Register::ZMM27,
4410 28 => Register::ZMM28,
4411 29 => Register::ZMM29,
4412 30 => Register::ZMM30,
4413 31 => Register::ZMM31,
4414 _ => Register::None,
4415 }
4416 } else if is_256 {
4417 // YMM registers (256-bit)
4418 match reg_num {
4419 0 => Register::YMM0,
4420 1 => Register::YMM1,
4421 2 => Register::YMM2,
4422 3 => Register::YMM3,
4423 4 => Register::YMM4,
4424 5 => Register::YMM5,
4425 6 => Register::YMM6,
4426 7 => Register::YMM7,
4427 8 => Register::YMM8,
4428 9 => Register::YMM9,
4429 10 => Register::YMM10,
4430 11 => Register::YMM11,
4431 12 => Register::YMM12,
4432 13 => Register::YMM13,
4433 14 => Register::YMM14,
4434 15 => Register::YMM15,
4435 16 => Register::YMM16,
4436 17 => Register::YMM17,
4437 18 => Register::YMM18,
4438 19 => Register::YMM19,
4439 20 => Register::YMM20,
4440 21 => Register::YMM21,
4441 22 => Register::YMM22,
4442 23 => Register::YMM23,
4443 24 => Register::YMM24,
4444 25 => Register::YMM25,
4445 26 => Register::YMM26,
4446 27 => Register::YMM27,
4447 28 => Register::YMM28,
4448 29 => Register::YMM29,
4449 30 => Register::YMM30,
4450 31 => Register::YMM31,
4451 _ => Register::None,
4452 }
4453 } else {
4454 // XMM registers (128-bit)
4455 match reg_num {
4456 0 => Register::XMM0,
4457 1 => Register::XMM1,
4458 2 => Register::XMM2,
4459 3 => Register::XMM3,
4460 4 => Register::XMM4,
4461 5 => Register::XMM5,
4462 6 => Register::XMM6,
4463 7 => Register::XMM7,
4464 8 => Register::XMM8,
4465 9 => Register::XMM9,
4466 10 => Register::XMM10,
4467 11 => Register::XMM11,
4468 12 => Register::XMM12,
4469 13 => Register::XMM13,
4470 14 => Register::XMM14,
4471 15 => Register::XMM15,
4472 16 => Register::XMM16,
4473 17 => Register::XMM17,
4474 18 => Register::XMM18,
4475 19 => Register::XMM19,
4476 20 => Register::XMM20,
4477 21 => Register::XMM21,
4478 22 => Register::XMM22,
4479 23 => Register::XMM23,
4480 24 => Register::XMM24,
4481 25 => Register::XMM25,
4482 26 => Register::XMM26,
4483 27 => Register::XMM27,
4484 28 => Register::XMM28,
4485 29 => Register::XMM29,
4486 30 => Register::XMM30,
4487 31 => Register::XMM31,
4488 _ => Register::None,
4489 }
4490 }
4491}
4492
4493impl Default for Decoder {
4494 fn default() -> Self {
4495 Self::new_64bit()
4496 }
4497}