use crate::{
assemble_bpf, assemble_bpf_ifblock_cond, assemble_bpf_ja, desymbolize_bpf_text, BpfVariant,
INSN_SIZE,
};
use ud_arch_codec::{ArchCodec, ArchError, EncodeHints, SwitchSpec};
#[derive(Debug, Clone, Copy)]
pub struct BpfCodec(pub BpfVariant);
impl BpfCodec {
pub const LINUX: Self = Self(BpfVariant::Linux);
pub const SBF_V1: Self = Self(BpfVariant::Sbfv1);
pub const SBF_V2: Self = Self(BpfVariant::Sbfv2);
}
fn slot_offset(source_ip: u64, target: u64) -> Result<i16, ArchError> {
let next_slot = source_ip.wrapping_add(INSN_SIZE as u64);
#[allow(clippy::cast_possible_wrap)]
let delta = (target as i64).wrapping_sub(next_slot as i64);
if delta % (INSN_SIZE as i64) != 0 {
return Err(ArchError::OutOfRange(format!(
"BPF branch displacement {delta} bytes is not slot-aligned"
)));
}
let slots = delta / (INSN_SIZE as i64);
i16::try_from(slots).map_err(|_| {
ArchError::OutOfRange(format!(
"BPF branch displacement {slots} slots overflows i16 (max ±32768)"
))
})
}
impl ArchCodec for BpfCodec {
fn name(&self) -> &'static str {
match self.0 {
BpfVariant::Linux => "bpf-linux",
BpfVariant::Sbfv1 => "bpf-sbf-v1",
BpfVariant::Sbfv2 => "bpf-sbf-v2",
}
}
fn assemble_one(&self, text: &str, _addr: u64) -> Result<Vec<u8>, ArchError> {
assemble_bpf(text).map_err(|e| ArchError::Assemble(e.to_string()))
}
fn desymbolize(&self, text: &str, addr: u64) -> String {
desymbolize_bpf_text(text, addr, None).unwrap_or_else(|| text.to_string())
}
fn encode_jump(
&self,
source_ip: u64,
target: u64,
_hints: EncodeHints,
) -> Result<Vec<u8>, ArchError> {
let off = slot_offset(source_ip, target)?;
assemble_bpf_ja(off).map_err(|e| ArchError::Assemble(e.to_string()))
}
fn encode_call(
&self,
source_ip: u64,
target: u64,
hints: EncodeHints,
) -> Result<Vec<u8>, ArchError> {
let slots = slot_offset(source_ip, target)?;
let imm32 = i32::from(slots);
let mnemonic = if hints.bpf_call_local.unwrap_or(false) {
"call_local"
} else {
"call_internal"
};
assemble_bpf(&format!("{mnemonic} {imm32}")).map_err(|e| ArchError::Assemble(e.to_string()))
}
fn encode_cond_jump(
&self,
cond_text: &str,
source_ip: u64,
target: u64,
_hints: EncodeHints,
) -> Result<Vec<u8>, ArchError> {
let off = slot_offset(source_ip, target)?;
assemble_bpf_ifblock_cond(cond_text, off).map_err(|e| ArchError::Assemble(e.to_string()))
}
fn encode_switch_dispatch(&self, _spec: &SwitchSpec) -> Result<Vec<u8>, ArchError> {
Err(ArchError::Unsupported {
arch: self.name(),
operation: "switch_dispatch",
})
}
fn encoded_jump_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
INSN_SIZE
}
fn encoded_cond_jump_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
INSN_SIZE
}
fn encoded_call_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
INSN_SIZE
}
fn direct_call_bytes_contain_call(&self) -> bool {
true
}
fn encode_move(&self, dst: &str, src: &str) -> Result<Vec<u8>, ArchError> {
let dst = dst.trim();
let src = src.trim();
let (dst_core, dst_size) = split_size_suffix(dst);
let (src_core, src_size) = split_size_suffix(src);
if is_bpf_reg(dst_core) && (src_size == Some(64) || is_lddw_imm(src_core)) {
let imm_str = src_core.trim();
return assemble_bpf(&format!("lddw {dst_core}, {imm_str}"))
.map_err(|e| ArchError::Assemble(e.to_string()));
}
if is_bpf_reg(dst_core) && is_bracket_mem(src_core) {
let bits = src_size.unwrap_or(64);
let suffix = size_suffix_for_bits(bits)?;
let mem = desymbolize_mem(src_core);
return assemble_bpf(&format!("ldx{suffix} {dst_core}, {mem}"))
.map_err(|e| ArchError::Assemble(e.to_string()));
}
if is_bracket_mem(dst_core) && is_bpf_reg(src_core) {
let bits = dst_size.unwrap_or(64);
let suffix = size_suffix_for_bits(bits)?;
let mem = desymbolize_mem(dst_core);
return assemble_bpf(&format!("stx{suffix} {mem}, {src_core}"))
.map_err(|e| ArchError::Assemble(e.to_string()));
}
if is_bpf_reg(dst_core) && (is_bpf_reg(src_core) || is_bpf_imm(src_core)) {
return assemble_bpf(&format!("mov64 {dst_core}, {src_core}"))
.map_err(|e| ArchError::Assemble(e.to_string()));
}
Err(ArchError::Unsupported {
arch: self.name(),
operation: "move (unrecognised operand shape)",
})
}
fn encode_return(&self, _value: Option<u64>) -> Result<Vec<u8>, ArchError> {
assemble_bpf("exit").map_err(|e| ArchError::Assemble(e.to_string()))
}
fn encode_arith(&self, dst: &str, op: &str, src: &str) -> Result<Vec<u8>, ArchError> {
let dst = dst.trim();
let src = src.trim();
if !is_bpf_reg(dst) {
return Err(ArchError::Unsupported {
arch: self.name(),
operation: "arith (non-register dst)",
});
}
if !(is_bpf_reg(src) || is_bpf_imm(src)) {
return Err(ArchError::Unsupported {
arch: self.name(),
operation: "arith (unsupported src shape)",
});
}
let mnemonic = match op {
"+=" => "add64",
"-=" => "sub64",
"*=" => "mul64",
"/=" => "div64",
"%=" => "mod64",
"|=" => "or64",
"&=" => "and64",
"^=" => "xor64",
"<<=" => "lsh64",
">>=" => "rsh64",
_ => {
return Err(ArchError::Unsupported {
arch: self.name(),
operation: "arith (unsupported op)",
});
}
};
assemble_bpf(&format!("{mnemonic} {dst}, {src}"))
.map_err(|e| ArchError::Assemble(e.to_string()))
}
}
fn is_bpf_reg(s: &str) -> bool {
let s = s.trim();
if !s.starts_with('r') {
return false;
}
let n = &s[1..];
matches!(
n,
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10"
)
}
fn is_bracket_mem(s: &str) -> bool {
let s = s.trim();
s.starts_with('[') && s.ends_with(']')
}
fn desymbolize_mem(operand: &str) -> String {
desymbolize_bpf_text(operand, 0, None).unwrap_or_else(|| operand.to_string())
}
fn split_size_suffix(s: &str) -> (&str, Option<u32>) {
let s = s.trim();
if let Some(idx) = s.rfind(":u") {
let suffix = &s[idx + 2..];
if let Ok(n) = suffix.parse::<u32>() {
if matches!(n, 8 | 16 | 32 | 64) {
return (s[..idx].trim_end(), Some(n));
}
}
}
(s, None)
}
fn size_suffix_for_bits(bits: u32) -> Result<&'static str, ArchError> {
match bits {
8 => Ok("b"),
16 => Ok("h"),
32 => Ok("w"),
64 => Ok("dw"),
_ => Err(ArchError::OutOfRange(format!(
"unsupported memory access width :u{bits}"
))),
}
}
fn is_lddw_imm(s: &str) -> bool {
let s = s.trim();
let s = s.strip_prefix('-').unwrap_or(s);
if let Some(hex) = s.strip_prefix("0x") {
if hex.is_empty() || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
return false;
}
u64::from_str_radix(hex, 16).is_ok_and(|v| v > u64::from(u32::MAX))
} else {
false
}
}
fn is_bpf_imm(s: &str) -> bool {
let s = s.trim();
let s = s.strip_prefix('-').unwrap_or(s);
if let Some(hex) = s.strip_prefix("0x") {
return !hex.is_empty() && hex.chars().all(|c| c.is_ascii_hexdigit());
}
!s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
}
pub fn register() {
ud_arch_codec::register(factory);
}
pub const EM_BPF: u64 = 247;
pub const EM_SBF: u64 = 263;
fn factory(arch_name: Option<&str>, e_machine: Option<u64>) -> Option<Box<dyn ArchCodec>> {
if let Some(em) = e_machine {
match em {
EM_BPF => return Some(Box::new(BpfCodec(BpfVariant::Linux))),
EM_SBF => return Some(Box::new(BpfCodec(BpfVariant::Sbfv1))),
_ => {}
}
}
match arch_name {
Some("bpf") => Some(Box::new(BpfCodec(BpfVariant::Linux))),
Some("sbf" | "sbfv1") => Some(Box::new(BpfCodec(BpfVariant::Sbfv1))),
Some("sbfv2") => Some(Box::new(BpfCodec(BpfVariant::Sbfv2))),
_ => None,
}
}