use iced_x86::{Code, Encoder, Instruction, Register};
use crate::Bitness;
#[derive(Debug, thiserror::Error)]
pub enum AssembleError {
#[error("empty text — assembler needs at least a mnemonic")]
Empty,
#[error("unsupported instruction form {form:?}")]
Unsupported { form: String },
#[error("iced encode failed: {message}")]
EncodeFailed { message: String },
}
pub fn assemble_intel(bitness: Bitness, text: &str, rip: u64) -> Result<Vec<u8>, AssembleError> {
let normalized = normalize(text);
if normalized.is_empty() {
return Err(AssembleError::Empty);
}
let insn = parse_text(&normalized, bitness)?;
encode_insn(&insn, bitness, rip)
}
fn normalize(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut prev_space = true;
for c in text.chars() {
if c.is_ascii_whitespace() {
if !prev_space {
out.push(' ');
prev_space = true;
}
} else {
out.extend(c.to_lowercase());
prev_space = false;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
fn split_mnemonic_and_rest(s: &str) -> (&str, &str) {
let s = s.strip_prefix("notrack ").unwrap_or(s);
match s.find(' ') {
Some(i) => (&s[..i], s[i + 1..].trim_start()),
None => (s, ""),
}
}
fn parse_text(text: &str, bitness: Bitness) -> Result<Instruction, AssembleError> {
let (mnemonic, operands) = split_mnemonic_and_rest(text);
if operands.is_empty() {
let code = zero_operand_code(mnemonic).ok_or_else(|| AssembleError::Unsupported {
form: text.to_string(),
})?;
return Ok(Instruction::with(code));
}
let ops = split_operands(operands);
match ops.as_slice() {
[a] => parse_single_operand(mnemonic, a, bitness, text),
[a, b] => parse_two_operand(mnemonic, a, b, bitness, text),
_ => Err(AssembleError::Unsupported {
form: text.to_string(),
}),
}
}
fn split_operands(s: &str) -> Vec<&str> {
let mut out = Vec::new();
let mut depth = 0i32;
let mut start = 0usize;
for (i, c) in s.char_indices() {
match c {
'[' => depth += 1,
']' => depth -= 1,
',' if depth == 0 => {
out.push(s[start..i].trim());
start = i + 1;
}
_ => {}
}
}
out.push(s[start..].trim());
out
}
fn parse_single_operand(
mnemonic: &str,
operand: &str,
bitness: Bitness,
text: &str,
) -> Result<Instruction, AssembleError> {
if let Some(reg) = parse_register(operand) {
let code = match mnemonic {
"push" => match register_width(reg) {
Some(64) => Code::Push_r64,
Some(32) => Code::Push_r32,
Some(16) => Code::Push_r16,
_ => return unsupported(text),
},
"pop" => match register_width(reg) {
Some(64) => Code::Pop_r64,
Some(32) => Code::Pop_r32,
Some(16) => Code::Pop_r16,
_ => return unsupported(text),
},
"call" => match (bitness, register_width(reg)) {
(Bitness::Bits64, Some(64)) => Code::Call_rm64,
(Bitness::Bits32 | Bitness::Bits16, Some(32)) => Code::Call_rm32,
_ => return unsupported(text),
},
"jmp" => match (bitness, register_width(reg)) {
(Bitness::Bits64, Some(64)) => Code::Jmp_rm64,
(Bitness::Bits32 | Bitness::Bits16, Some(32)) => Code::Jmp_rm32,
_ => return unsupported(text),
},
_ => return unsupported(text),
};
return Instruction::with1(code, reg).map_err(|e| AssembleError::EncodeFailed {
message: format!("{e:?}"),
});
}
if let Some(imm) = parse_immediate(operand) {
match mnemonic {
"push" => {
let (code, value): (Code, i64) = if (-128..=127).contains(&imm) {
(Code::Pushq_imm8, imm)
} else {
(Code::Pushq_imm32, imm)
};
return Instruction::with1::<i32>(code, value as i32).map_err(|e| {
AssembleError::EncodeFailed {
message: format!("{e:?}"),
}
});
}
_ => return unsupported(text),
}
}
if let Some(mem) = parse_memory(operand) {
let code = match mnemonic {
"call" => match (bitness, mem.size_hint) {
(Bitness::Bits64, Some(MemSize::Qword)) => Code::Call_rm64,
(Bitness::Bits32 | Bitness::Bits16, Some(MemSize::Dword)) => Code::Call_rm32,
_ => return unsupported(text),
},
"jmp" => match (bitness, mem.size_hint) {
(Bitness::Bits64, Some(MemSize::Qword)) => Code::Jmp_rm64,
(Bitness::Bits32 | Bitness::Bits16, Some(MemSize::Dword)) => Code::Jmp_rm32,
_ => return unsupported(text),
},
"push" => match (bitness, mem.size_hint) {
(Bitness::Bits64, Some(MemSize::Qword)) => Code::Push_rm64,
(Bitness::Bits32 | Bitness::Bits16, Some(MemSize::Dword)) => Code::Push_rm32,
_ => return unsupported(text),
},
"nop" => match mem.size_hint {
Some(MemSize::None) => Code::Nop_rm32,
_ => return unsupported(text),
},
_ => return unsupported(text),
};
let mem_op = if bitness == Bitness::Bits64
&& mem.base == Register::None
&& mem.index == Register::None
{
build_rip_relative_operand(mem.displacement)
} else {
build_memory_operand(&mem)
};
return Instruction::with1::<iced_x86::MemoryOperand>(code, mem_op).map_err(|e| {
AssembleError::EncodeFailed {
message: format!("{e:?}"),
}
});
}
unsupported(text)
}
fn parse_two_operand(
mnemonic: &str,
a: &str,
b: &str,
_bitness: Bitness,
text: &str,
) -> Result<Instruction, AssembleError> {
let (Some(ra), Some(rb)) = (parse_register(a), parse_register(b)) else {
return unsupported(text);
};
let wa = register_width(ra).ok_or_else(|| AssembleError::Unsupported {
form: text.to_string(),
})?;
let wb = register_width(rb).ok_or_else(|| AssembleError::Unsupported {
form: text.to_string(),
})?;
if wa != wb {
return unsupported(text);
}
let code = match mnemonic {
"xchg" => {
if ra == rb {
let nop_code = match wa {
16 if ra == Register::AX => Some(Code::Nopw),
32 if ra == Register::EAX => Some(Code::Nopd),
64 if ra == Register::RAX => Some(Code::Nopq),
_ => Option::None,
};
if let Some(c) = nop_code {
return Ok(Instruction::with(c));
}
}
match wa {
64 => Code::Xchg_rm64_r64,
32 => Code::Xchg_rm32_r32,
16 => Code::Xchg_rm16_r16,
8 => Code::Xchg_rm8_r8,
_ => return unsupported(text),
}
}
"mov" => match wa {
64 => Code::Mov_rm64_r64,
32 => Code::Mov_rm32_r32,
16 => Code::Mov_rm16_r16,
8 => Code::Mov_rm8_r8,
_ => return unsupported(text),
},
_ => return unsupported(text),
};
Instruction::with2(code, ra, rb).map_err(|e| AssembleError::EncodeFailed {
message: format!("{e:?}"),
})
}
fn unsupported(text: &str) -> Result<Instruction, AssembleError> {
Err(AssembleError::Unsupported {
form: text.to_string(),
})
}
fn parse_immediate(s: &str) -> Option<i64> {
let neg = s.starts_with('-');
let body = s.strip_prefix('-').unwrap_or(s);
let v: i64 = if let Some(rest) = body.strip_prefix("0x") {
i64::from_str_radix(rest, 16).ok()?
} else if body.chars().all(|c| c.is_ascii_digit()) && !body.is_empty() {
body.parse().ok()?
} else {
return Option::None;
};
Some(if neg { -v } else { v })
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MemSize {
None,
Byte,
Word,
Dword,
Qword,
}
#[derive(Debug, Clone)]
struct ParsedMemory {
size_hint: Option<MemSize>,
base: Register,
index: Register,
scale: u32,
displacement: i64,
}
fn parse_memory(s: &str) -> Option<ParsedMemory> {
let (size_hint, body) = parse_size_prefix(s);
let inner = body.strip_prefix('[')?.strip_suffix(']')?;
let inner = inner.trim();
let mut base = Register::None;
let mut index = Register::None;
let mut scale: u32 = 1;
let mut disp: i64 = 0;
for (sign, term) in tokenize_addr(inner) {
let term = term.trim();
if let Some((reg_part, scale_part)) = term.split_once('*') {
let r = parse_register(reg_part.trim())?;
let sc: u32 = scale_part.trim().parse().ok()?;
if index != Register::None {
return Option::None;
}
index = r;
scale = sc;
continue;
}
if let Some(r) = parse_register(term) {
if base == Register::None {
base = r;
} else if index == Register::None {
index = r;
} else {
return Option::None;
}
continue;
}
if let Some(v) = parse_immediate(term) {
disp = disp.checked_add(if sign { -v } else { v })?;
continue;
}
return Option::None;
}
Some(ParsedMemory {
size_hint: Some(size_hint),
base,
index,
scale,
displacement: disp,
})
}
fn parse_size_prefix(s: &str) -> (MemSize, &str) {
for (prefix, sz) in &[
("qword ptr ", MemSize::Qword),
("dword ptr ", MemSize::Dword),
("word ptr ", MemSize::Word),
("byte ptr ", MemSize::Byte),
] {
if let Some(rest) = s.strip_prefix(prefix) {
return (*sz, rest);
}
}
(MemSize::None, s)
}
fn tokenize_addr(s: &str) -> Vec<(bool, &str)> {
let mut out = Vec::new();
let mut start = 0usize;
let mut neg = false;
for (i, c) in s.char_indices() {
if (c == '+' || c == '-') && i > start {
out.push((neg, &s[start..i]));
neg = c == '-';
start = i + 1;
}
}
out.push((neg, &s[start..]));
out
}
fn build_memory_operand(mem: &ParsedMemory) -> iced_x86::MemoryOperand {
use iced_x86::MemoryOperand;
MemoryOperand::with_base_index_scale_displ_size(
mem.base,
mem.index,
mem.scale,
mem.displacement,
0,
)
}
fn build_rip_relative_operand(target: i64) -> iced_x86::MemoryOperand {
use iced_x86::MemoryOperand;
MemoryOperand::with_base_index_scale_displ_size(
Register::RIP,
Register::None,
1,
target,
4,
)
}
#[allow(clippy::enum_glob_use)]
fn parse_register(name: &str) -> Option<Register> {
use Register::*;
Some(match name {
"rax" => RAX,
"rcx" => RCX,
"rdx" => RDX,
"rbx" => RBX,
"rsp" => RSP,
"rbp" => RBP,
"rsi" => RSI,
"rdi" => RDI,
"r8" => R8,
"r9" => R9,
"r10" => R10,
"r11" => R11,
"r12" => R12,
"r13" => R13,
"r14" => R14,
"r15" => R15,
"eax" => EAX,
"ecx" => ECX,
"edx" => EDX,
"ebx" => EBX,
"esp" => ESP,
"ebp" => EBP,
"esi" => ESI,
"edi" => EDI,
"r8d" => R8D,
"r9d" => R9D,
"r10d" => R10D,
"r11d" => R11D,
"r12d" => R12D,
"r13d" => R13D,
"r14d" => R14D,
"r15d" => R15D,
"ax" => AX,
"cx" => CX,
"dx" => DX,
"bx" => BX,
"sp" => SP,
"bp" => BP,
"si" => SI,
"di" => DI,
"r8w" => R8W,
"r9w" => R9W,
"r10w" => R10W,
"r11w" => R11W,
"r12w" => R12W,
"r13w" => R13W,
"r14w" => R14W,
"r15w" => R15W,
"al" => AL,
"cl" => CL,
"dl" => DL,
"bl" => BL,
"ah" => AH,
"ch" => CH,
"dh" => DH,
"bh" => BH,
"spl" => SPL,
"bpl" => BPL,
"sil" => SIL,
"dil" => DIL,
"r8b" | "r8l" => R8L,
"r9b" | "r9l" => R9L,
"r10b" | "r10l" => R10L,
"r11b" | "r11l" => R11L,
"r12b" | "r12l" => R12L,
"r13b" | "r13l" => R13L,
"r14b" | "r14l" => R14L,
"r15b" | "r15l" => R15L,
"xmm0" => XMM0,
"xmm1" => XMM1,
"xmm2" => XMM2,
"xmm3" => XMM3,
"xmm4" => XMM4,
"xmm5" => XMM5,
"xmm6" => XMM6,
"xmm7" => XMM7,
"xmm8" => XMM8,
"xmm9" => XMM9,
"xmm10" => XMM10,
"xmm11" => XMM11,
"xmm12" => XMM12,
"xmm13" => XMM13,
"xmm14" => XMM14,
"xmm15" => XMM15,
_ => return Option::None, })
}
#[allow(clippy::enum_glob_use)]
fn register_width(reg: Register) -> Option<u32> {
use Register::*;
match reg {
RAX | RCX | RDX | RBX | RSP | RBP | RSI | RDI | R8 | R9 | R10 | R11 | R12 | R13 | R14
| R15 => Some(64),
EAX | ECX | EDX | EBX | ESP | EBP | ESI | EDI | R8D | R9D | R10D | R11D | R12D | R13D
| R14D | R15D => Some(32),
AX | CX | DX | BX | SP | BP | SI | DI | R8W | R9W | R10W | R11W | R12W | R13W | R14W
| R15W => Some(16),
AL | CL | DL | BL | AH | CH | DH | BH | SPL | BPL | SIL | DIL | R8L | R9L | R10L | R11L
| R12L | R13L | R14L | R15L => Some(8),
_ => Option::None, }
}
fn zero_operand_code(mnemonic: &str) -> Option<Code> {
Some(match mnemonic {
"endbr64" => Code::Endbr64,
"endbr32" => Code::Endbr32,
"hlt" => Code::Hlt,
"nop" => Code::Nopd,
"int3" => Code::Int3,
"ret" | "retq" => Code::Retnq,
"retn" => Code::Retnd,
"cdqe" => Code::Cdqe,
"cwde" => Code::Cwde,
"cbw" => Code::Cbw,
"leave" | "leaveq" => Code::Leaveq,
"syscall" => Code::Syscall,
"ud2" => Code::Ud2,
"pause" => Code::Pause,
"rdtsc" => Code::Rdtsc,
"cpuid" => Code::Cpuid,
_ => return None,
})
}
fn encode_insn(insn: &Instruction, bitness: Bitness, rip: u64) -> Result<Vec<u8>, AssembleError> {
let mut encoder = Encoder::new(bitness.as_u32());
encoder
.encode(insn, rip)
.map_err(|e| AssembleError::EncodeFailed {
message: format!("{e:?}"),
})?;
Ok(encoder.take_buffer())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{decode, format_intel};
#[track_caller]
fn round_trip(bitness: Bitness, text: &str, expected: &[u8]) {
let bytes = assemble_intel(bitness, text, 0x1000)
.unwrap_or_else(|e| panic!("assemble {text:?}: {e}"));
assert_eq!(
bytes, expected,
"bytes mismatch for {text:?}: got {bytes:02x?}, want {expected:02x?}"
);
let insns = decode(bitness, &bytes, 0x1000).expect("decode round-trip");
assert_eq!(insns.len(), 1, "expected single instruction");
let canonical = format_intel(&insns[0].iced);
assert_eq!(
normalize(&canonical),
normalize(text),
"canonical text diverges"
);
}
#[test]
fn zero_operand_x86_64() {
round_trip(Bitness::Bits64, "endbr64", &[0xf3, 0x0f, 0x1e, 0xfa]);
round_trip(Bitness::Bits64, "hlt", &[0xf4]);
round_trip(Bitness::Bits64, "nop", &[0x90]);
round_trip(Bitness::Bits64, "int3", &[0xcc]);
round_trip(Bitness::Bits64, "ret", &[0xc3]);
round_trip(Bitness::Bits64, "cdqe", &[0x48, 0x98]);
round_trip(Bitness::Bits64, "leave", &[0xc9]);
round_trip(Bitness::Bits64, "syscall", &[0x0f, 0x05]);
round_trip(Bitness::Bits64, "ud2", &[0x0f, 0x0b]);
}
#[test]
fn zero_operand_normalizes_case_and_whitespace() {
round_trip(Bitness::Bits64, " ENDBR64 ", &[0xf3, 0x0f, 0x1e, 0xfa]);
}
#[test]
fn unknown_mnemonic_returns_unsupported() {
match assemble_intel(Bitness::Bits64, "completely-fake-insn", 0x1000) {
Err(AssembleError::Unsupported { .. }) => {}
other => panic!("expected Unsupported, got {other:?}"),
}
}
#[test]
fn empty_text_returns_empty_error() {
match assemble_intel(Bitness::Bits64, "", 0x1000) {
Err(AssembleError::Empty) => {}
other => panic!("expected Empty, got {other:?}"),
}
}
#[test]
fn push_pop_register_x86_64() {
round_trip(Bitness::Bits64, "push rax", &[0x50]);
round_trip(Bitness::Bits64, "push rdi", &[0x57]);
round_trip(Bitness::Bits64, "push r8", &[0x41, 0x50]);
round_trip(Bitness::Bits64, "push r15", &[0x41, 0x57]);
round_trip(Bitness::Bits64, "pop rax", &[0x58]);
round_trip(Bitness::Bits64, "pop r12", &[0x41, 0x5c]);
}
#[test]
fn xchg_register_register_x86_64() {
round_trip(Bitness::Bits64, "xchg rcx,rcx", &[0x48, 0x87, 0xc9]);
round_trip(Bitness::Bits64, "xchg eax,edx", &[0x87, 0xd0]);
}
#[test]
fn mov_register_register_x86_64() {
round_trip(Bitness::Bits64, "mov rax,rbx", &[0x48, 0x89, 0xd8]);
round_trip(Bitness::Bits64, "mov eax,edx", &[0x89, 0xd0]);
round_trip(Bitness::Bits64, "mov r8,r15", &[0x4d, 0x89, 0xf8]);
}
#[test]
fn call_jmp_register_x86_64() {
round_trip(Bitness::Bits64, "call rax", &[0xff, 0xd0]);
round_trip(Bitness::Bits64, "jmp rax", &[0xff, 0xe0]);
round_trip(Bitness::Bits64, "call r12", &[0x41, 0xff, 0xd4]);
round_trip(Bitness::Bits64, "jmp r12", &[0x41, 0xff, 0xe4]);
}
#[test]
fn push_immediate_x86_64() {
round_trip(Bitness::Bits64, "push 0", &[0x6a, 0x00]);
round_trip(Bitness::Bits64, "push 1", &[0x6a, 0x01]);
}
#[test]
fn nop_with_memory_operand_x86_64() {
round_trip(Bitness::Bits64, "nop [rax]", &[0x0f, 0x1f, 0x00]);
round_trip(Bitness::Bits64, "nop [rax+rax]", &[0x0f, 0x1f, 0x04, 0x00]);
}
#[test]
fn mov_mem_operand_is_still_unsupported() {
match assemble_intel(Bitness::Bits64, "mov rax,[rbx]", 0x1000) {
Err(AssembleError::Unsupported { .. }) => {}
other => panic!("expected Unsupported, got {other:?}"),
}
}
#[test]
fn xchg_same_register_emits_nop_variant() {
round_trip(Bitness::Bits64, "xchg ax,ax", &[0x66, 0x90]);
round_trip(Bitness::Bits64, "xchg rax,rax", &[0x48, 0x90]);
}
#[test]
fn rip_relative_indirect_jmp_call_push_x86_64() {
let bytes = assemble_intel(Bitness::Bits64, "jmp qword ptr [0x3fc0]", 0x1030).unwrap();
assert_eq!(bytes, vec![0xff, 0x25, 0x8a, 0x2f, 0x00, 0x00]);
let bytes = assemble_intel(Bitness::Bits64, "call qword ptr [0x3fd8]", 0x1030).unwrap();
assert_eq!(bytes, vec![0xff, 0x15, 0xa2, 0x2f, 0x00, 0x00]);
let bytes = assemble_intel(Bitness::Bits64, "push qword ptr [0x3fb8]", 0x1030).unwrap();
assert_eq!(bytes, vec![0xff, 0x35, 0x82, 0x2f, 0x00, 0x00]);
}
}