use std::string::{String, ToString};
use std::vec::Vec;
use std::{format, println};
use crate::{
AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, CodeSink, DeferredTarget, Fixed, Gpr, Inst,
KnownOffset, NonRspGpr, Registers, TrapCode, Xmm,
};
use arbitrary::{Arbitrary, Result, Unstructured};
use capstone::{Capstone, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, arch::x86};
pub fn roundtrip(inst: &Inst<FuzzRegs>) {
let assembled = assemble(inst);
let expected = disassemble(&assembled, inst);
let expected = expected.split_once(' ').unwrap().1;
let actual = inst.to_string();
if expected != actual && expected.trim() != fix_up(&actual) {
println!("> {inst}");
println!(" debug: {inst:x?}");
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
println!(" expected (capstone): {expected}");
println!(" actual (to_string): {actual}");
assert_eq!(expected, &actual);
}
}
fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
let mut sink = TestCodeSink::default();
inst.encode(&mut sink);
sink.patch_labels_as_if_they_referred_to_end();
sink.buf
}
#[derive(Default)]
struct TestCodeSink {
buf: Vec<u8>,
offsets_using_label: Vec<usize>,
}
impl TestCodeSink {
fn patch_labels_as_if_they_referred_to_end(&mut self) {
let len = i32::try_from(self.buf.len()).unwrap();
for offset in self.offsets_using_label.iter() {
let range = self.buf[*offset..].first_chunk_mut::<4>().unwrap();
let offset = i32::try_from(*offset).unwrap() + 4;
let rel_distance = len - offset;
*range = (i32::from_le_bytes(*range) + rel_distance).to_le_bytes();
}
}
}
impl CodeSink for TestCodeSink {
fn put1(&mut self, v: u8) {
self.buf.extend_from_slice(&[v]);
}
fn put2(&mut self, v: u16) {
self.buf.extend_from_slice(&v.to_le_bytes());
}
fn put4(&mut self, v: u32) {
self.buf.extend_from_slice(&v.to_le_bytes());
}
fn put8(&mut self, v: u64) {
self.buf.extend_from_slice(&v.to_le_bytes());
}
fn add_trap(&mut self, _: TrapCode) {}
fn use_target(&mut self, _: DeferredTarget) {
let offset = self.buf.len();
self.offsets_using_label.push(offset);
}
fn known_offset(&self, target: KnownOffset) -> i32 {
panic!("unsupported known target {target:?}")
}
}
fn disassemble(assembled: &[u8], original: &Inst<FuzzRegs>) -> String {
let cs = Capstone::new()
.x86()
.mode(x86::ArchMode::Mode64)
.syntax(x86::ArchSyntax::Att)
.detail(true)
.build()
.expect("failed to create Capstone object");
let insts = cs
.disasm_all(assembled, 0x0)
.expect("failed to disassemble");
if insts.len() != 1 {
println!("> {original}");
println!(" debug: {original:x?}");
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
assert_eq!(insts.len(), 1, "not a single instruction");
}
let inst = insts.first().expect("at least one instruction");
if assembled.len() != inst.len() {
println!("> {original}");
println!(" debug: {original:x?}");
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
println!(
" capstone-assembled: {}",
pretty_print_hexadecimal(inst.bytes())
);
assert_eq!(assembled.len(), inst.len(), "extra bytes not disassembled");
}
inst.to_string()
}
fn pretty_print_hexadecimal(hex: &[u8]) -> String {
use core::fmt::Write;
let mut s = String::with_capacity(hex.len() * 2);
for b in hex {
write!(&mut s, "{b:02X}").unwrap();
}
s
}
macro_rules! hex_print_signed_imm {
($hex:expr, $from:ty => $to:ty) => {{
let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
let mut simm = String::new();
if imm < 0 {
simm.push_str("-");
}
let abs = match imm.checked_abs() {
Some(i) => i,
None => <$to>::MIN,
};
if imm > -10 && imm < 10 {
simm.push_str(&format!("{:x}", abs));
} else {
simm.push_str(&format!("0x{:x}", abs));
}
simm
}};
}
fn replace_signed_immediates(dis: &str) -> alloc::borrow::Cow<'_, str> {
match dis.find('$') {
None => dis.into(),
Some(idx) => {
let (prefix, rest) = dis.split_at(idx + 1); let (_, rest) = chomp("-", rest); let (_, rest) = chomp("0x", rest); let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
let (hex, rest) = rest.split_at(n); let simm = if dis.starts_with("mov") {
u64::from_str_radix(hex, 16).unwrap().to_string()
} else {
match hex.len() {
1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
4 => hex_print_signed_imm!(hex, u16 => i16),
8 => hex_print_signed_imm!(hex, u32 => i32),
16 => hex_print_signed_imm!(hex, u64 => i64),
_ => panic!("unexpected length for hex: {hex}"),
}
};
format!("{prefix}{simm}{rest}").into()
}
}
}
fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
if s.starts_with(pat) {
s.split_at(pat.len())
} else {
("", s)
}
}
#[test]
fn replace() {
assert_eq!(
replace_signed_immediates("andl $0xffffff9a, %r11d"),
"andl $-0x66, %r11d"
);
assert_eq!(
replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
"xorq $-0x44, 0x7f139ecc(%r9)"
);
assert_eq!(
replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
"subl $0x3ca77a19, -0x1a030f40(%r14)"
);
assert_eq!(
replace_signed_immediates("movq $0xffffffff864ae103, %rsi"),
"movq $18446744071667638531, %rsi"
);
}
fn remove_after_semicolon(dis: &str) -> &str {
match dis.find(';') {
None => dis,
Some(idx) => {
let (prefix, _) = dis.split_at(idx);
prefix.trim()
}
}
}
#[test]
fn remove_after_parenthesis_test() {
assert_eq!(
remove_after_semicolon("imulb 0x7658eddd(%rcx) ;; implicit: %ax"),
"imulb 0x7658eddd(%rcx)"
);
}
fn fix_up(dis: &str) -> alloc::borrow::Cow<'_, str> {
let dis = remove_after_semicolon(dis);
replace_signed_immediates(&dis)
}
#[derive(Clone, Arbitrary, Debug)]
pub struct FuzzRegs;
impl Registers for FuzzRegs {
type ReadGpr = FuzzReg;
type ReadWriteGpr = FuzzReg;
type WriteGpr = FuzzReg;
type ReadXmm = FuzzReg;
type ReadWriteXmm = FuzzReg;
type WriteXmm = FuzzReg;
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct FuzzReg(u8);
impl<'a> Arbitrary<'a> for FuzzReg {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self(u.int_in_range(0..=15)?))
}
}
impl AsReg for FuzzReg {
fn new(enc: u8) -> Self {
Self(enc)
}
fn enc(&self) -> u8 {
self.0
}
}
impl Arbitrary<'_> for AmodeOffset {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
let base = if u.arbitrary()? {
i32::from(u.arbitrary::<i8>()?)
} else {
u.arbitrary::<i32>()?
};
Ok(match u.int_in_range(0..=5)? {
0 => AmodeOffset::ZERO,
n => AmodeOffset::new(base << (n - 1)),
})
}
}
impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
Ok(Self {
simm32: AmodeOffset::arbitrary(u)?,
offset: None,
})
}
}
impl<R: AsReg, const E: u8> Arbitrary<'_> for Fixed<R, E> {
fn arbitrary(_: &mut Unstructured<'_>) -> Result<Self> {
Ok(Self::new(E))
}
}
impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
use crate::gpr::enc::*;
let gpr = u.choose(&[
RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
])?;
Ok(Self::new(R::new(*gpr)))
}
}
impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
Ok(Self(R::new(u.int_in_range(0..=15)?)))
}
}
impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
Ok(Self(R::new(u.int_in_range(0..=15)?)))
}
}
pub trait RegistersArbitrary:
Registers<
ReadGpr: for<'a> Arbitrary<'a>,
ReadWriteGpr: for<'a> Arbitrary<'a>,
WriteGpr: for<'a> Arbitrary<'a>,
ReadXmm: for<'a> Arbitrary<'a>,
ReadWriteXmm: for<'a> Arbitrary<'a>,
WriteXmm: for<'a> Arbitrary<'a>,
>
{
}
impl<R> RegistersArbitrary for R
where
R: Registers,
R::ReadGpr: for<'a> Arbitrary<'a>,
R::ReadWriteGpr: for<'a> Arbitrary<'a>,
R::WriteGpr: for<'a> Arbitrary<'a>,
R::ReadXmm: for<'a> Arbitrary<'a>,
R::ReadWriteXmm: for<'a> Arbitrary<'a>,
R::WriteXmm: for<'a> Arbitrary<'a>,
{
}
#[cfg(test)]
mod test {
use super::*;
use arbtest::arbtest;
use std::sync::atomic::{AtomicUsize, Ordering};
#[test]
fn smoke() {
let count = AtomicUsize::new(0);
arbtest(|u| {
let inst: Inst<FuzzRegs> = u.arbitrary()?;
roundtrip(&inst);
println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
Ok(())
})
.budget_ms(1_000);
}
#[test]
fn callq() {
for i in -500..500 {
println!("immediate: {i}");
let inst = crate::inst::callq_d::new(i);
roundtrip(&inst.into());
}
}
}