#[must_use]
pub fn encode_cmp_or_test(text: &str) -> Option<Vec<u8>> {
let text = text.trim();
if let Some(rest) = text.strip_prefix("test ") {
return encode_test(rest.trim());
}
if let Some(rest) = text.strip_prefix("cmp ") {
return encode_cmp(rest.trim());
}
None
}
#[must_use]
pub fn encode_head_from_cond_text(cond_text: &str) -> Option<Vec<u8>> {
let head = cond_text.split(';').next()?.trim();
if head.is_empty() {
return None;
}
if let Some(bytes) = encode_relational(head) {
return Some(bytes);
}
encode_cmp_or_test(head)
}
fn encode_relational(text: &str) -> Option<Vec<u8>> {
let (lhs, op, rhs) = split_relational(text)?;
let is_signed_op = !op.ends_with('u');
if is_signed_op && rhs == "0" {
if let Some(r) = parse_reg32(lhs) {
return Some(vec![0x85, mod_rm_reg_reg(r, r)]);
}
if let Some(r) = parse_reg8(lhs) {
return Some(vec![0x84, mod_rm_reg_reg(r, r)]);
}
}
encode_cmp(&format!("{lhs},{rhs}"))
}
fn split_relational(text: &str) -> Option<(&str, &str, &str)> {
const OPS: &[&str] = &["<=u", ">=u", "<u", ">u", "==", "!=", "<=", ">=", "<", ">"];
let mut depth = 0i32;
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' | b'[' => {
depth += 1;
i += 1;
continue;
}
b')' | b']' => {
depth -= 1;
i += 1;
continue;
}
_ => {}
}
if depth == 0 {
for op in OPS {
if text[i..].starts_with(op) {
let lhs = text[..i].trim();
let rhs = text[i + op.len()..].trim();
if !lhs.is_empty() && !rhs.is_empty() {
return Some((lhs, op, rhs));
}
}
}
}
i += 1;
}
None
}
fn encode_test(operands: &str) -> Option<Vec<u8>> {
let (a, b) = split_two(operands)?;
if let (Some(r1), Some(r2)) = (parse_reg32(a), parse_reg32(b)) {
return Some(vec![0x85, mod_rm_reg_reg(r2, r1)]);
}
if let (Some(r1), Some(r2)) = (parse_reg8(a), parse_reg8(b)) {
return Some(vec![0x84, mod_rm_reg_reg(r2, r1)]);
}
None
}
fn encode_cmp(operands: &str) -> Option<Vec<u8>> {
let (a, b) = split_two(operands)?;
if let (Some(r1), Some(r2)) = (parse_reg8(a), parse_reg8(b)) {
return Some(vec![0x3A, mod_rm_reg_reg(r1, r2)]);
}
if let (Some(r1), Some(r2)) = (parse_reg32(a), parse_reg32(b)) {
return Some(vec![0x3B, mod_rm_reg_reg(r1, r2)]);
}
if let (Some(reg), Some(imm)) = (parse_reg32(a), parse_int_literal(b)) {
return Some(encode_cmp_reg_imm(reg, imm, a == "eax"));
}
if let (Some(reg), Some(mem)) = (parse_reg32(a), parse_mem_dword(b)) {
return encode_modrm_mem(0x3B, reg, mem);
}
if let (Some(mem), Some(reg)) = (parse_mem_dword(a), parse_reg32(b)) {
return encode_modrm_mem(0x39, reg, mem);
}
if let (Some(mem), Some(imm)) = (parse_mem_dword(a), parse_int_literal(b)) {
return Some(encode_cmp_mem_imm(mem, imm));
}
None
}
#[derive(Clone, Copy)]
enum MemOperand {
BasedDisp { base: u8, disp: i64 },
Absolute { addr: u32 },
}
fn encode_modrm_mem(opcode: u8, reg: u8, mem: MemOperand) -> Option<Vec<u8>> {
let mut out = vec![opcode];
push_mem_modrm(&mut out, reg, mem)?;
Some(out)
}
fn push_mem_modrm(out: &mut Vec<u8>, reg_field: u8, mem: MemOperand) -> Option<()> {
match mem {
MemOperand::BasedDisp { base, disp } => {
if base == 4 {
return None;
}
let (mod_field, disp_bytes) = mem_mod_and_disp(base, disp);
out.push((mod_field << 6) | (reg_field << 3) | base);
out.extend_from_slice(&disp_bytes);
}
MemOperand::Absolute { addr } => {
out.push((reg_field << 3) | 0b101);
out.extend_from_slice(&addr.to_le_bytes());
}
}
Some(())
}
fn encode_cmp_reg_imm(reg: u8, imm: i64, reg_is_eax: bool) -> Vec<u8> {
let imm32 = imm_as_i32(imm);
if let Some(imm32) = imm32 {
if let Ok(imm8) = i8::try_from(imm32) {
return vec![0x83, mod_rm_op_reg(7, reg), imm8.to_ne_bytes()[0]];
}
if reg_is_eax {
let mut out = vec![0x3D];
out.extend_from_slice(&imm32.to_le_bytes());
return out;
}
let mut out = vec![0x81, mod_rm_op_reg(7, reg)];
out.extend_from_slice(&imm32.to_le_bytes());
return out;
}
let mut out = vec![0x81, mod_rm_op_reg(7, reg)];
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
let truncated = imm as u32;
out.extend_from_slice(&truncated.to_le_bytes());
out
}
fn imm_as_i32(v: i64) -> Option<i32> {
if (i64::from(i32::MIN)..=i64::from(i32::MAX)).contains(&v) {
#[allow(clippy::cast_possible_truncation)]
return Some(v as i32);
}
if (0..=i64::from(u32::MAX)).contains(&v) {
#[allow(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap
)]
return Some(v as u32 as i32);
}
None
}
fn encode_cmp_mem_imm(mem: MemOperand, imm: i64) -> Vec<u8> {
let imm32 = imm_as_i32(imm);
let (opcode, imm_bytes) = match imm32 {
Some(v) if i8::try_from(v).is_ok() => {
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let b = v as u8;
(0x83u8, vec![b])
}
Some(v) => (0x81u8, v.to_le_bytes().to_vec()),
None => {
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
let v = (imm as u32).to_le_bytes().to_vec();
(0x81u8, v)
}
};
let mut out = vec![opcode];
if let MemOperand::BasedDisp { base: 4, .. } = mem {
return Vec::new();
}
if push_mem_modrm(&mut out, 7, mem).is_none() {
return Vec::new();
}
out.extend_from_slice(&imm_bytes);
out
}
fn mem_mod_and_disp(reg_base: u8, disp: i64) -> (u8, Vec<u8>) {
if disp == 0 && reg_base != 5 {
return (0b00, Vec::new());
}
if let Ok(disp8) = i8::try_from(disp) {
return (0b01, vec![disp8.to_ne_bytes()[0]]);
}
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
let v = (disp as u32).to_le_bytes().to_vec();
(0b10, v)
}
fn mod_rm_reg_reg(reg: u8, rm: u8) -> u8 {
0b11_000_000 | (reg << 3) | rm
}
fn mod_rm_op_reg(op: u8, rm: u8) -> u8 {
0b11_000_000 | (op << 3) | rm
}
fn split_two(s: &str) -> Option<(&str, &str)> {
let comma = s.find(',')?;
Some((s[..comma].trim(), s[comma + 1..].trim()))
}
fn parse_reg32(s: &str) -> Option<u8> {
match s {
"eax" => Some(0),
"ecx" => Some(1),
"edx" => Some(2),
"ebx" => Some(3),
"esp" => Some(4),
"ebp" => Some(5),
"esi" => Some(6),
"edi" => Some(7),
_ => None,
}
}
fn parse_mem_dword(s: &str) -> Option<MemOperand> {
let s = s.trim();
if let Some(rest) = s.strip_prefix("arg_") {
let off = u32::from_str_radix(rest, 16).ok()?;
return Some(MemOperand::BasedDisp {
base: 5, disp: i64::from(off),
});
}
if let Some(rest) = s.strip_prefix("var_") {
let off = u32::from_str_radix(rest, 16).ok()?;
return Some(MemOperand::BasedDisp {
base: 5, disp: -i64::from(off),
});
}
let s = s.strip_prefix("dword ptr ").unwrap_or(s).trim();
let s = s.strip_prefix("qword ptr ").unwrap_or(s).trim();
let inner = s
.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))?
.trim();
if let Some((base, disp_part)) = split_after_reg(inner) {
if let Some(reg) = parse_reg32(base) {
let disp = if disp_part.is_empty() {
0i64
} else {
parse_int_literal(disp_part)?
};
return Some(MemOperand::BasedDisp { base: reg, disp });
}
}
if let Some(addr) = parse_int_literal(inner) {
let addr32 = imm_as_i32(addr)?;
#[allow(clippy::cast_sign_loss)]
return Some(MemOperand::Absolute {
addr: addr32 as u32,
});
}
None
}
fn parse_reg8(s: &str) -> Option<u8> {
match s {
"al" => Some(0),
"cl" => Some(1),
"dl" => Some(2),
"bl" => Some(3),
"ah" => Some(4),
"ch" => Some(5),
"dh" => Some(6),
"bh" => Some(7),
_ => None,
}
}
fn split_after_reg(s: &str) -> Option<(&str, &str)> {
let s = s.trim();
let end = s
.find(|c: char| !c.is_ascii_alphabetic())
.unwrap_or(s.len());
if end == 0 {
return None;
}
let (reg, rest) = s.split_at(end);
Some((reg, rest.trim()))
}
fn parse_int_literal(s: &str) -> Option<i64> {
let s = s.trim();
let (sign, body) = if let Some(rest) = s.strip_prefix('-') {
(-1i64, rest.trim())
} else if let Some(rest) = s.strip_prefix('+') {
(1i64, rest.trim())
} else {
(1i64, s)
};
let unsigned = if let Some(hex) = body.strip_prefix("0x").or_else(|| body.strip_prefix("0X")) {
i64::from_str_radix(hex, 16).ok()?
} else if let Some(hex) = body.strip_suffix('h').or_else(|| body.strip_suffix('H')) {
i64::from_str_radix(hex, 16).ok()?
} else {
body.parse::<i64>().ok()?
};
Some(sign * unsigned)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_reg_reg_same_register() {
assert_eq!(encode_cmp_or_test("test eax,eax"), Some(vec![0x85, 0xc0]));
assert_eq!(encode_cmp_or_test("test esi,esi"), Some(vec![0x85, 0xf6]));
assert_eq!(encode_cmp_or_test("test edi,edi"), Some(vec![0x85, 0xff]));
assert_eq!(encode_cmp_or_test("test ecx,ecx"), Some(vec![0x85, 0xc9]));
assert_eq!(encode_cmp_or_test("test ebp,ebp"), Some(vec![0x85, 0xed]));
}
#[test]
fn test_reg_reg_different_registers() {
assert_eq!(encode_cmp_or_test("test ebx,eax"), Some(vec![0x85, 0xc3]));
}
#[test]
fn cmp_reg_imm8() {
assert_eq!(
encode_cmp_or_test("cmp esi,1"),
Some(vec![0x83, 0xfe, 0x01])
);
assert_eq!(
encode_cmp_or_test("cmp esi,2"),
Some(vec![0x83, 0xfe, 0x02])
);
assert_eq!(
encode_cmp_or_test("cmp eax,0"),
Some(vec![0x83, 0xf8, 0x00])
);
}
#[test]
fn cmp_reg_imm32() {
assert_eq!(
encode_cmp_or_test("cmp eax,0x10000"),
Some(vec![0x3d, 0x00, 0x00, 0x01, 0x00])
);
assert_eq!(
encode_cmp_or_test("cmp ebx,0x10000"),
Some(vec![0x81, 0xfb, 0x00, 0x00, 0x01, 0x00])
);
}
#[test]
fn cmp_mem_absolute_imm() {
assert_eq!(
encode_cmp_or_test("cmp dword ptr [1C26D368h],7"),
Some(vec![0x83, 0x3d, 0x68, 0xd3, 0x26, 0x1c, 0x07])
);
}
#[test]
fn cmp_mem_with_indexed_base() {
assert_eq!(
encode_cmp_or_test("cmp [esi+4E0h],ebx"),
Some(vec![0x39, 0x9e, 0xe0, 0x04, 0x00, 0x00])
);
}
#[test]
fn cmp_reg_mem() {
assert_eq!(
encode_cmp_or_test("cmp eax,[ebp-14h]"),
Some(vec![0x3b, 0x45, 0xec])
);
}
#[test]
fn test_reg8_reg8() {
assert_eq!(encode_cmp_or_test("test al,al"), Some(vec![0x84, 0xc0]));
assert_eq!(encode_cmp_or_test("test bl,bl"), Some(vec![0x84, 0xdb]));
}
#[test]
fn cmp_reg8_reg8() {
assert_eq!(encode_cmp_or_test("cmp dl,bl"), Some(vec![0x3a, 0xd3]));
}
#[test]
fn cmp_reg_reg() {
assert_eq!(encode_cmp_or_test("cmp eax,ebx"), Some(vec![0x3b, 0xc3]));
}
#[test]
fn cmp_mem_imm_short_displacement() {
assert_eq!(
encode_cmp_or_test("cmp dword ptr [edi+0A0h],0"),
Some(vec![0x83, 0xbf, 0xa0, 0x00, 0x00, 0x00, 0x00])
);
}
#[test]
fn cmp_mem_imm_small_displacement() {
assert_eq!(
encode_cmp_or_test("cmp [ebp-4],1"),
Some(vec![0x83, 0x7d, 0xfc, 0x01])
);
}
#[test]
fn from_cond_text_strips_jcc() {
assert_eq!(
encode_head_from_cond_text("test esi,esi; jne short 18114h"),
Some(vec![0x85, 0xf6])
);
}
#[test]
fn unrecognised_returns_none() {
assert_eq!(encode_cmp_or_test("nop"), None);
assert_eq!(encode_cmp_or_test("test xmm0,xmm0"), None);
}
}