#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_possible_wrap)]
#![allow(clippy::cast_sign_loss)]
use std::collections::BTreeSet;
use ud_core::VAddr;
use ud_ir::{ArchInsn, BasicBlock, Function, Terminator};
mod assemble;
mod codec;
pub use assemble::{
assemble_bpf, assemble_bpf_ifblock_cond, assemble_bpf_ja, desymbolize_bpf_text, AssembleError,
};
pub use codec::{register, BpfCodec, EM_BPF, EM_SBF};
pub const INSN_SIZE: usize = 8;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BpfVariant {
Linux,
Sbfv1,
Sbfv2,
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(
"byte buffer length {len} is not a multiple of {INSN_SIZE} (BPF slots are fixed-width)"
)]
Misaligned { len: usize },
#[error("lddw at offset {offset:#x} truncated — second slot missing")]
LddwTruncated { offset: usize },
#[error("lddw at offset {offset:#x} continuation slot has non-zero opcode {opcode:#x}")]
LddwBadContinuation { offset: usize, opcode: u8 },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InsnKind {
Alu32,
Alu64,
Jmp,
JmpCond,
JmpCond32,
Call,
CallReg,
Exit,
Load,
Store,
Lddw,
LddwSecondHalf,
Endian,
Unknown,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedInsn {
pub addr: VAddr,
pub bytes: [u8; INSN_SIZE],
pub kind: InsnKind,
pub opcode: u8,
pub dst: u8,
pub src: u8,
pub offset: i16,
pub imm: i32,
pub imm64: Option<u64>,
}
impl DecodedInsn {
#[must_use]
pub fn raw_u64(&self) -> u64 {
u64::from_le_bytes(self.bytes)
}
}
impl ArchInsn for DecodedInsn {
fn addr(&self) -> VAddr {
self.addr
}
fn original_bytes(&self) -> &[u8] {
&self.bytes
}
}
pub fn decode(bytes: &[u8], start: u64, variant: BpfVariant) -> Result<Vec<DecodedInsn>> {
if bytes.len() % INSN_SIZE != 0 {
return Err(Error::Misaligned { len: bytes.len() });
}
let mut out = Vec::with_capacity(bytes.len() / INSN_SIZE);
let mut i = 0usize;
while i < bytes.len() {
let slot = &bytes[i..i + INSN_SIZE];
let raw: [u8; INSN_SIZE] = slot.try_into().expect("INSN_SIZE chunk");
let addr = start.saturating_add(i as u64);
let opcode = raw[0];
let dst = raw[1] & 0x0f;
let src = (raw[1] >> 4) & 0x0f;
let offset = i16::from_le_bytes([raw[2], raw[3]]);
let imm = i32::from_le_bytes([raw[4], raw[5], raw[6], raw[7]]);
if opcode == 0x18 {
let has_well_formed_pair =
i + 2 * INSN_SIZE <= bytes.len() && bytes[i + INSN_SIZE] == 0;
if !has_well_formed_pair {
out.push(DecodedInsn {
addr: VAddr(addr),
bytes: raw,
kind: InsnKind::Unknown,
opcode,
dst,
src,
offset,
imm,
imm64: None,
});
i += INSN_SIZE;
continue;
}
let cont = &bytes[i + INSN_SIZE..i + 2 * INSN_SIZE];
let imm_hi = u32::from_le_bytes([cont[4], cont[5], cont[6], cont[7]]);
let imm_lo = imm as u32;
let imm64 = (u64::from(imm_hi) << 32) | u64::from(imm_lo);
out.push(DecodedInsn {
addr: VAddr(addr),
bytes: raw,
kind: InsnKind::Lddw,
opcode,
dst,
src,
offset,
imm,
imm64: Some(imm64),
});
let cont_raw: [u8; INSN_SIZE] = cont.try_into().expect("INSN_SIZE chunk");
let cont_addr = addr.wrapping_add(INSN_SIZE as u64);
out.push(DecodedInsn {
addr: VAddr(cont_addr),
bytes: cont_raw,
kind: InsnKind::LddwSecondHalf,
opcode: 0,
dst: cont_raw[1] & 0x0f,
src: (cont_raw[1] >> 4) & 0x0f,
offset: i16::from_le_bytes([cont_raw[2], cont_raw[3]]),
imm: i32::from_le_bytes([cont_raw[4], cont_raw[5], cont_raw[6], cont_raw[7]]),
imm64: None,
});
i += 2 * INSN_SIZE;
continue;
}
let kind = classify_opcode(opcode, variant);
out.push(DecodedInsn {
addr: VAddr(addr),
bytes: raw,
kind,
opcode,
dst,
src,
offset,
imm,
imm64: None,
});
i += INSN_SIZE;
}
Ok(out)
}
#[must_use]
pub fn classify(insn: &DecodedInsn, variant: BpfVariant) -> InsnKind {
classify_opcode(insn.opcode, variant)
}
fn classify_opcode(opcode: u8, variant: BpfVariant) -> InsnKind {
let class = opcode & 0x07;
let _ = variant;
match class {
0x00 | 0x01 => InsnKind::Load,
0x02 | 0x03 => InsnKind::Store,
0x04 => {
if (opcode >> 4) == 0xd {
InsnKind::Endian
} else {
InsnKind::Alu32
}
}
0x05 => classify_jmp(opcode, variant),
0x06 => classify_jmp32(opcode),
0x07 => {
if (opcode >> 4) == 0xd {
InsnKind::Endian
} else {
InsnKind::Alu64
}
}
_ => InsnKind::Unknown,
}
}
fn classify_jmp(opcode: u8, variant: BpfVariant) -> InsnKind {
let op = opcode >> 4;
match op {
0x0 => InsnKind::Jmp,
0x8 => {
if opcode == 0x8d && matches!(variant, BpfVariant::Sbfv1 | BpfVariant::Sbfv2) {
InsnKind::CallReg
} else if opcode == 0x8d {
InsnKind::Call
} else if opcode == 0x85 {
InsnKind::Call
} else {
InsnKind::JmpCond
}
}
0x9 if opcode == 0x95 => InsnKind::Exit,
_ => InsnKind::JmpCond,
}
}
fn classify_jmp32(opcode: u8) -> InsnKind {
let _ = opcode;
InsnKind::JmpCond32
}
#[must_use]
pub fn jump_target(insn: &DecodedInsn) -> u64 {
let next_slot = insn.addr.0.wrapping_add(INSN_SIZE as u64);
let off_bytes = i64::from(insn.offset).wrapping_mul(INSN_SIZE as i64);
next_slot.wrapping_add(off_bytes as u64)
}
#[must_use]
pub fn call_target(insn: &DecodedInsn) -> u64 {
let next_slot = insn.addr.0.wrapping_add(INSN_SIZE as u64);
let off_bytes = i64::from(insn.imm).wrapping_mul(INSN_SIZE as i64);
next_slot.wrapping_add(off_bytes as u64)
}
#[must_use]
pub fn lift_function(name: String, insns: &[DecodedInsn]) -> Function<DecodedInsn> {
let addr = insns.first().map_or(VAddr(0), |i| i.addr);
if insns.is_empty() {
return Function {
addr,
name,
blocks: Vec::new(),
};
}
let fn_start = addr.0;
let fn_end = insns
.last()
.map_or(fn_start, |i| i.addr.0.wrapping_add(INSN_SIZE as u64));
let mut boundaries: BTreeSet<u64> = BTreeSet::new();
boundaries.insert(fn_start);
for i in insns {
if matches!(
i.kind,
InsnKind::Jmp | InsnKind::JmpCond | InsnKind::JmpCond32
) {
let t = jump_target(i);
if (fn_start..fn_end).contains(&t) {
boundaries.insert(t);
}
}
if matches!(
i.kind,
InsnKind::Jmp | InsnKind::JmpCond | InsnKind::JmpCond32 | InsnKind::Exit
) {
let next = i.addr.0.wrapping_add(INSN_SIZE as u64);
if next < fn_end {
boundaries.insert(next);
}
}
}
let mut blocks: Vec<BasicBlock<DecodedInsn>> = Vec::new();
let mut current: Vec<DecodedInsn> = Vec::new();
let mut current_addr: u64 = fn_start;
for i in insns {
if boundaries.contains(&i.addr.0) && !current.is_empty() {
let term = block_terminator(¤t);
blocks.push(BasicBlock {
addr: VAddr(current_addr),
insns: std::mem::take(&mut current),
terminator: term,
});
current_addr = i.addr.0;
}
current.push(i.clone());
}
if !current.is_empty() {
let term = block_terminator(¤t);
blocks.push(BasicBlock {
addr: VAddr(current_addr),
insns: current,
terminator: term,
});
}
Function { addr, name, blocks }
}
fn block_terminator(insns: &[DecodedInsn]) -> Terminator {
let Some(last) = insns.last() else {
return Terminator::Fallthrough;
};
match last.kind {
InsnKind::Exit => Terminator::Return,
InsnKind::Jmp => Terminator::UnconditionalBranch {
target: VAddr(jump_target(last)),
},
InsnKind::JmpCond | InsnKind::JmpCond32 => Terminator::ConditionalBranch {
taken: VAddr(jump_target(last)),
fallthrough: VAddr(last.addr.0.wrapping_add(INSN_SIZE as u64)),
},
InsnKind::CallReg => Terminator::IndirectBranch,
_ => Terminator::Fallthrough,
}
}
#[must_use]
pub fn format_insn(insn: &DecodedInsn, variant: BpfVariant) -> String {
if matches!(insn.kind, InsnKind::LddwSecondHalf) {
return format!("<lddw-cont 0x{:08x}>", insn.imm as u32);
}
let class = insn.opcode & 0x07;
match class {
0x00 | 0x01 => format_ld(insn),
0x02 | 0x03 => format_st(insn),
0x04 => format_alu(insn, false, variant),
0x05 => format_jmp(insn, false, variant),
0x06 => format_jmp(insn, true, variant),
0x07 => format_alu(insn, true, variant),
_ => format!("<bpf 0x{:016x}>", insn.raw_u64()),
}
}
fn format_ld(insn: &DecodedInsn) -> String {
if insn.opcode == 0x18 {
let imm = match insn.imm64 {
Some(v) => v,
None => u64::from(insn.imm as u32),
};
return format!("lddw r{}, 0x{:x}", insn.dst, imm);
}
if insn.opcode == 0 {
return format!("<lddw-cont 0x{:08x}>", insn.imm as u32);
}
if matches!(insn.opcode, 0x20 | 0x28 | 0x30 | 0x38 | 0x40 | 0x48 | 0x50) {
let sz = size_letter(insn.opcode);
return format!("ld_abs_{sz} r0, 0x{:x}", insn.imm as u32);
}
let sz = size_letter(insn.opcode);
let offset = format_offset(insn.offset);
format!("ldx{sz} r{}, [r{}{offset}]", insn.dst, insn.src)
}
fn format_st(insn: &DecodedInsn) -> String {
let sz = size_letter(insn.opcode);
let offset = format_offset(insn.offset);
if (insn.opcode & 0x07) == 0x02 {
format!("st{sz} [r{}{offset}], 0x{:x}", insn.dst, insn.imm as u32)
} else {
format!("stx{sz} [r{}{offset}], r{}", insn.dst, insn.src)
}
}
fn size_letter(opcode: u8) -> &'static str {
match opcode & 0x18 {
0x00 => "w",
0x08 => "h",
0x10 => "b",
0x18 => "dw",
_ => unreachable!(),
}
}
fn format_offset(offset: i16) -> String {
use std::cmp::Ordering;
match offset.cmp(&0) {
Ordering::Equal => String::new(),
Ordering::Greater => format!(" + 0x{offset:x}"),
Ordering::Less => {
let abs = u32::from(offset.unsigned_abs());
format!(" - 0x{abs:x}")
}
}
}
fn format_alu(insn: &DecodedInsn, alu64: bool, variant: BpfVariant) -> String {
let is_reg = (insn.opcode & 0x08) != 0;
let op_nibble = insn.opcode >> 4;
let suffix = if alu64 { "64" } else { "32" };
let mnemonic = match (op_nibble, alu64, variant) {
(0x0, _, _) => "add",
(0x1, _, _) => "sub",
(0x2, _, _) => "mul",
(0x3, _, BpfVariant::Linux | BpfVariant::Sbfv1) => "div",
(0x3, _, BpfVariant::Sbfv2) => "udiv",
(0x4, _, _) => "or",
(0x5, _, _) => "and",
(0x6, _, _) => "lsh",
(0x7, _, _) => "rsh",
(0x8, _, _) => "neg",
(0x9, _, BpfVariant::Linux | BpfVariant::Sbfv1) => "mod",
(0x9, _, BpfVariant::Sbfv2) => "urem",
(0xa, _, _) => "xor",
(0xb, _, _) => "mov",
(0xc, _, _) => "arsh",
(0xd, _, _) => return format_endian(insn),
(0xe, _, BpfVariant::Sbfv2) => "sdiv",
(0xf, _, BpfVariant::Sbfv2) => "srem",
_ => "<alu?>",
};
if matches!(op_nibble, 0x8) {
return format!("neg{suffix} r{}", insn.dst);
}
if is_reg {
format!("{mnemonic}{suffix} r{}, r{}", insn.dst, insn.src)
} else {
format!("{mnemonic}{suffix} r{}, 0x{:x}", insn.dst, insn.imm as u32)
}
}
fn format_endian(insn: &DecodedInsn) -> String {
let dir = if (insn.opcode & 0x08) == 0 {
"le"
} else {
"be"
};
format!("{dir}{} r{}", insn.imm, insn.dst)
}
fn format_jmp(insn: &DecodedInsn, is_32: bool, _variant: BpfVariant) -> String {
let op = insn.opcode >> 4;
if op == 0 && !is_32 && insn.opcode == 0x05 {
return format!("ja {}", format_branch_offset(insn.offset));
}
if insn.opcode == 0x85 {
return format!("call 0x{:x}", insn.imm as u32);
}
if insn.opcode == 0x8d {
return format!("callx r{}", insn.dst);
}
if insn.opcode == 0x95 {
return "exit".into();
}
let is_reg = (insn.opcode & 0x08) != 0;
let suffix = if is_32 { "32" } else { "" };
let mnemonic = match op {
0x1 => "jeq",
0x2 => "jgt",
0x3 => "jge",
0x4 => "jset",
0x5 => "jne",
0x6 => "jsgt",
0x7 => "jsge",
0xa => "jlt",
0xb => "jle",
0xc => "jslt",
0xd => "jsle",
_ => "<jcc?>",
};
let rhs = if is_reg {
format!("r{}", insn.src)
} else {
format!("0x{:x}", insn.imm as u32)
};
format!(
"{mnemonic}{suffix} r{}, {rhs}, {}",
insn.dst,
format_branch_offset(insn.offset)
)
}
fn format_branch_offset(offset: i16) -> String {
if offset >= 0 {
format!("+0x{offset:x}")
} else {
let abs = u32::from(offset.unsigned_abs());
format!("-0x{abs:x}")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decodes_fixture_filter() {
let bytes: Vec<u8> = vec![
0x79, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xbc, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
let insns = decode(&bytes, 0, BpfVariant::Linux).unwrap();
assert_eq!(insns.len(), 6);
assert_eq!(insns[0].kind, InsnKind::Load);
assert_eq!(insns[1].kind, InsnKind::Alu32);
assert_eq!(insns[2].kind, InsnKind::JmpCond);
assert_eq!(insns[5].kind, InsnKind::Exit);
let mut reconstructed: Vec<u8> = Vec::with_capacity(bytes.len());
for i in &insns {
reconstructed.extend_from_slice(&i.bytes);
}
assert_eq!(reconstructed, bytes);
}
#[test]
fn rejects_misaligned_buffer() {
let bytes = [0u8; 7];
assert!(matches!(
decode(&bytes, 0, BpfVariant::Linux),
Err(Error::Misaligned { len: 7 })
));
}
#[test]
fn lddw_pairs_two_slots() {
let bytes: Vec<u8> = vec![
0x18, 0x01, 0x00, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0xef, 0xcd,
0xab, 0x90,
];
let insns = decode(&bytes, 0, BpfVariant::Linux).unwrap();
assert_eq!(insns.len(), 2);
assert_eq!(insns[0].kind, InsnKind::Lddw);
assert_eq!(insns[0].imm64, Some(0x90ab_cdef_1234_5678));
assert_eq!(insns[1].kind, InsnKind::LddwSecondHalf);
assert_eq!(insns[1].bytes, [0, 0, 0, 0, 0xef, 0xcd, 0xab, 0x90]);
}
#[test]
fn exit_drives_return_terminator() {
let bytes = [0x95, 0, 0, 0, 0, 0, 0, 0];
let insns = decode(&bytes, 0x100, BpfVariant::Linux).unwrap();
let f = lift_function("f".into(), &insns);
assert_eq!(f.blocks[0].terminator, Terminator::Return);
}
#[test]
fn opcode_8d_classification_per_variant() {
let bytes = [0x8d, 0x30, 0, 0, 0, 0, 0, 0];
assert_eq!(
decode(&bytes, 0, BpfVariant::Linux).unwrap()[0].kind,
InsnKind::Call,
);
assert_eq!(
decode(&bytes, 0, BpfVariant::Sbfv1).unwrap()[0].kind,
InsnKind::CallReg,
);
}
#[test]
fn formats_basic_ops() {
let bytes: Vec<u8> = vec![
0x79, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let insns = decode(&bytes, 0, BpfVariant::Linux).unwrap();
assert_eq!(format_insn(&insns[0], BpfVariant::Linux), "ldxdw r1, [r1]");
assert_eq!(format_insn(&insns[1], BpfVariant::Linux), "mov32 r0, 0x0");
assert_eq!(format_insn(&insns[2], BpfVariant::Linux), "exit");
}
}