1use crate::{
14 assemble_bpf, assemble_bpf_ifblock_cond, assemble_bpf_ja, desymbolize_bpf_text, BpfVariant,
15 INSN_SIZE,
16};
17use ud_arch_codec::{ArchCodec, ArchError, EncodeHints, SwitchSpec};
18
19#[derive(Debug, Clone, Copy)]
21pub struct BpfCodec(pub BpfVariant);
22
23impl BpfCodec {
24 pub const LINUX: Self = Self(BpfVariant::Linux);
26 pub const SBF_V1: Self = Self(BpfVariant::Sbfv1);
28 pub const SBF_V2: Self = Self(BpfVariant::Sbfv2);
30}
31
32fn slot_offset(source_ip: u64, target: u64) -> Result<i16, ArchError> {
37 let next_slot = source_ip.wrapping_add(INSN_SIZE as u64);
38 #[allow(clippy::cast_possible_wrap)]
39 let delta = (target as i64).wrapping_sub(next_slot as i64);
40 if delta % (INSN_SIZE as i64) != 0 {
41 return Err(ArchError::OutOfRange(format!(
42 "BPF branch displacement {delta} bytes is not slot-aligned"
43 )));
44 }
45 let slots = delta / (INSN_SIZE as i64);
46 i16::try_from(slots).map_err(|_| {
47 ArchError::OutOfRange(format!(
48 "BPF branch displacement {slots} slots overflows i16 (max ±32768)"
49 ))
50 })
51}
52
53impl ArchCodec for BpfCodec {
54 fn name(&self) -> &'static str {
55 match self.0 {
56 BpfVariant::Linux => "bpf-linux",
57 BpfVariant::Sbfv1 => "bpf-sbf-v1",
58 BpfVariant::Sbfv2 => "bpf-sbf-v2",
59 }
60 }
61
62 fn assemble_one(&self, text: &str, _addr: u64) -> Result<Vec<u8>, ArchError> {
63 assemble_bpf(text).map_err(|e| ArchError::Assemble(e.to_string()))
64 }
65
66 fn desymbolize(&self, text: &str, addr: u64) -> String {
67 desymbolize_bpf_text(text, addr, None).unwrap_or_else(|| text.to_string())
68 }
69
70 fn encode_jump(
71 &self,
72 source_ip: u64,
73 target: u64,
74 _hints: EncodeHints,
75 ) -> Result<Vec<u8>, ArchError> {
76 let off = slot_offset(source_ip, target)?;
77 assemble_bpf_ja(off).map_err(|e| ArchError::Assemble(e.to_string()))
78 }
79
80 fn encode_call(
97 &self,
98 source_ip: u64,
99 target: u64,
100 hints: EncodeHints,
101 ) -> Result<Vec<u8>, ArchError> {
102 let slots = slot_offset(source_ip, target)?;
103 let imm32 = i32::from(slots);
104 let mnemonic = if hints.bpf_call_local.unwrap_or(false) {
105 "call_local"
106 } else {
107 "call_internal"
108 };
109 assemble_bpf(&format!("{mnemonic} {imm32}")).map_err(|e| ArchError::Assemble(e.to_string()))
110 }
111
112 fn encode_cond_jump(
113 &self,
114 cond_text: &str,
115 source_ip: u64,
116 target: u64,
117 _hints: EncodeHints,
118 ) -> Result<Vec<u8>, ArchError> {
119 let off = slot_offset(source_ip, target)?;
120 assemble_bpf_ifblock_cond(cond_text, off).map_err(|e| ArchError::Assemble(e.to_string()))
121 }
122
123 fn encode_switch_dispatch(&self, _spec: &SwitchSpec) -> Result<Vec<u8>, ArchError> {
124 Err(ArchError::Unsupported {
127 arch: self.name(),
128 operation: "switch_dispatch",
129 })
130 }
131
132 fn encoded_jump_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
133 INSN_SIZE
134 }
135
136 fn encoded_cond_jump_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
137 INSN_SIZE
138 }
139
140 fn encoded_call_size(&self, _source_ip: u64, _target: u64, _hints: EncodeHints) -> usize {
141 INSN_SIZE
142 }
143
144 fn direct_call_bytes_contain_call(&self) -> bool {
147 true
148 }
149
150 fn encode_move(&self, dst: &str, src: &str) -> Result<Vec<u8>, ArchError> {
168 let dst = dst.trim();
169 let src = src.trim();
170 let (dst_core, dst_size) = split_size_suffix(dst);
175 let (src_core, src_size) = split_size_suffix(src);
176
177 if is_bpf_reg(dst_core) && (src_size == Some(64) || is_lddw_imm(src_core)) {
179 let imm_str = src_core.trim();
180 return assemble_bpf(&format!("lddw {dst_core}, {imm_str}"))
181 .map_err(|e| ArchError::Assemble(e.to_string()));
182 }
183
184 if is_bpf_reg(dst_core) && is_bracket_mem(src_core) {
186 let bits = src_size.unwrap_or(64);
187 let suffix = size_suffix_for_bits(bits)?;
188 let mem = desymbolize_mem(src_core);
189 return assemble_bpf(&format!("ldx{suffix} {dst_core}, {mem}"))
190 .map_err(|e| ArchError::Assemble(e.to_string()));
191 }
192
193 if is_bracket_mem(dst_core) && is_bpf_reg(src_core) {
195 let bits = dst_size.unwrap_or(64);
196 let suffix = size_suffix_for_bits(bits)?;
197 let mem = desymbolize_mem(dst_core);
198 return assemble_bpf(&format!("stx{suffix} {mem}, {src_core}"))
199 .map_err(|e| ArchError::Assemble(e.to_string()));
200 }
201
202 if is_bpf_reg(dst_core) && (is_bpf_reg(src_core) || is_bpf_imm(src_core)) {
204 return assemble_bpf(&format!("mov64 {dst_core}, {src_core}"))
205 .map_err(|e| ArchError::Assemble(e.to_string()));
206 }
207
208 Err(ArchError::Unsupported {
209 arch: self.name(),
210 operation: "move (unrecognised operand shape)",
211 })
212 }
213
214 fn encode_return(&self, _value: Option<u64>) -> Result<Vec<u8>, ArchError> {
217 assemble_bpf("exit").map_err(|e| ArchError::Assemble(e.to_string()))
218 }
219
220 fn encode_arith(&self, dst: &str, op: &str, src: &str) -> Result<Vec<u8>, ArchError> {
229 let dst = dst.trim();
230 let src = src.trim();
231 if !is_bpf_reg(dst) {
232 return Err(ArchError::Unsupported {
233 arch: self.name(),
234 operation: "arith (non-register dst)",
235 });
236 }
237 if !(is_bpf_reg(src) || is_bpf_imm(src)) {
238 return Err(ArchError::Unsupported {
239 arch: self.name(),
240 operation: "arith (unsupported src shape)",
241 });
242 }
243 let mnemonic = match op {
244 "+=" => "add64",
245 "-=" => "sub64",
246 "*=" => "mul64",
247 "/=" => "div64",
248 "%=" => "mod64",
249 "|=" => "or64",
250 "&=" => "and64",
251 "^=" => "xor64",
252 "<<=" => "lsh64",
253 ">>=" => "rsh64",
254 _ => {
255 return Err(ArchError::Unsupported {
256 arch: self.name(),
257 operation: "arith (unsupported op)",
258 });
259 }
260 };
261 assemble_bpf(&format!("{mnemonic} {dst}, {src}"))
262 .map_err(|e| ArchError::Assemble(e.to_string()))
263 }
264}
265
266fn is_bpf_reg(s: &str) -> bool {
268 let s = s.trim();
269 if !s.starts_with('r') {
270 return false;
271 }
272 let n = &s[1..];
273 matches!(
274 n,
275 "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "10"
276 )
277}
278
279fn is_bracket_mem(s: &str) -> bool {
283 let s = s.trim();
284 s.starts_with('[') && s.ends_with(']')
285}
286
287fn desymbolize_mem(operand: &str) -> String {
294 desymbolize_bpf_text(operand, 0, None).unwrap_or_else(|| operand.to_string())
295}
296
297fn split_size_suffix(s: &str) -> (&str, Option<u32>) {
301 let s = s.trim();
302 if let Some(idx) = s.rfind(":u") {
303 let suffix = &s[idx + 2..];
304 if let Ok(n) = suffix.parse::<u32>() {
305 if matches!(n, 8 | 16 | 32 | 64) {
306 return (s[..idx].trim_end(), Some(n));
307 }
308 }
309 }
310 (s, None)
311}
312
313fn size_suffix_for_bits(bits: u32) -> Result<&'static str, ArchError> {
315 match bits {
316 8 => Ok("b"),
317 16 => Ok("h"),
318 32 => Ok("w"),
319 64 => Ok("dw"),
320 _ => Err(ArchError::OutOfRange(format!(
321 "unsupported memory access width :u{bits}"
322 ))),
323 }
324}
325
326fn is_lddw_imm(s: &str) -> bool {
332 let s = s.trim();
333 let s = s.strip_prefix('-').unwrap_or(s);
334 if let Some(hex) = s.strip_prefix("0x") {
335 if hex.is_empty() || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
336 return false;
337 }
338 u64::from_str_radix(hex, 16).is_ok_and(|v| v > u64::from(u32::MAX))
339 } else {
340 false
341 }
342}
343
344fn is_bpf_imm(s: &str) -> bool {
348 let s = s.trim();
349 let s = s.strip_prefix('-').unwrap_or(s);
350 if let Some(hex) = s.strip_prefix("0x") {
351 return !hex.is_empty() && hex.chars().all(|c| c.is_ascii_hexdigit());
352 }
353 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
354}
355
356pub fn register() {
364 ud_arch_codec::register(factory);
365}
366
367pub const EM_BPF: u64 = 247;
369pub const EM_SBF: u64 = 263;
372
373fn factory(arch_name: Option<&str>, e_machine: Option<u64>) -> Option<Box<dyn ArchCodec>> {
374 if let Some(em) = e_machine {
375 match em {
376 EM_BPF => return Some(Box::new(BpfCodec(BpfVariant::Linux))),
377 EM_SBF => return Some(Box::new(BpfCodec(BpfVariant::Sbfv1))),
378 _ => {}
379 }
380 }
381 match arch_name {
382 Some("bpf") => Some(Box::new(BpfCodec(BpfVariant::Linux))),
383 Some("sbf" | "sbfv1") => Some(Box::new(BpfCodec(BpfVariant::Sbfv1))),
384 Some("sbfv2") => Some(Box::new(BpfCodec(BpfVariant::Sbfv2))),
385 _ => None,
386 }
387}