Skip to main content

ud_arch_bpf/
assemble.rs

1//! BPF text → bytes assembler.
2//!
3//! The inverse of [`format_insn`]: given the raw textual form
4//! the decoder produces (e.g. `"ldxdw r0, [r5 - 0xff8]"`,
5//! `"jeq r0, 0x0, +0x7"`, `"mov64 r1, r2"`), emit the 8-byte
6//! BPF slot encoding. Combined with a decompile-time
7//! byte-drop pass, this turns the `@asm("text", [bytes])`
8//! pairs in `.ud` source into `@asm("text")` — the bytes
9//! become regenerable from the text alone.
10//!
11//! ## Round-trip contract
12//!
13//! For every opcode this assembler recognises:
14//!
15//! > `assemble(format_insn(decode(bytes)).text) == bytes`
16//!
17//! This makes the "decompile → recompile → byte-identical"
18//! test meaningful: the text layer can no longer hide
19//! encoding bugs the way it could when bytes were
20//! shadow-pinned next to every `@asm`.
21//!
22//! ## Scope (phase 1)
23//!
24//! Handles every "pure" form `format_insn` emits — numeric
25//! operands only:
26//!
27//! * Loads: `ldx{w,h,b,dw} rD, [rS]` / `[rS + 0xN]` / `[rS - 0xN]`
28//! * Stores: `st{w,h,b,dw} [rD ±off], 0xN` and `stx{w,h,b,dw} [rD ±off], rS`
29//! * ALU (32 / 64): `add/sub/mul/div/mod/or/and/lsh/rsh/arsh/xor/mov/neg`
30//!   (+ sBPFv2's `udiv/urem/sdiv/srem`); reg or imm source.
31//! * Endian: `le16/le32/le64/be16/be32/be64 rD`
32//! * Branches: `ja ±0x…`, `j{eq,ne,gt,lt,ge,le,sgt,slt,sge,sle,set}{32?} rD, rhs, ±0x…`
33//! * Calls: `call 0xN`, `callx rD`
34//! * `exit`
35//! * `lddw rD, 0x…` (first slot) + `<lddw-cont 0x…>` (second slot)
36//! * `<bpf 0xNNNN…>` fallback — raw u64 → 8 bytes
37//!
38//! Symbolic forms (`call sub_X`, `jeq …, label_Y`,
39//! `lddw r1, "string"`) are out of scope here. They land
40//! later via a symbol-resolution layer in the translation
41//! crate — at which point those texts are de-symbolised back
42//! to the pure forms this module accepts.
43//!
44//! [`format_insn`]: super::format_insn
45
46use crate::INSN_SIZE;
47
48/// Errors the assembler surfaces. Each one points at the
49/// specific shape that failed to parse / encode, so the
50/// decompile-time byte-drop pass can keep bytes pinned for
51/// the lines we can't yet handle (typically symbolic forms).
52#[derive(Debug, thiserror::Error)]
53pub enum AssembleError {
54    #[error("empty text")]
55    Empty,
56    #[error("unknown mnemonic {0:?}")]
57    UnknownMnemonic(String),
58    #[error("malformed operand {0:?} for {1}")]
59    BadOperand(String, &'static str),
60    #[error("expected {expected} operands for {mnemonic}, got {got}")]
61    WrongArity {
62        mnemonic: String,
63        expected: usize,
64        got: usize,
65    },
66    #[error("immediate {value:#x} doesn't fit in {bits} bits")]
67    ImmediateOverflow { value: u64, bits: u8 },
68    #[error("branch offset {0} doesn't fit in i16")]
69    OffsetOverflow(i64),
70    #[error("register {0} out of range 0..=10")]
71    BadRegister(u32),
72    #[error("not a known textual form")]
73    NotRecognised,
74}
75
76/// Assemble one BPF instruction text into its 8-byte slot
77/// encoding. The address argument is currently unused — BPF
78/// branch offsets are encoded as slot-relative i16 values
79/// taken directly from the text, so the assembler doesn't
80/// need to know where in the function it lives.
81///
82/// Returns 8 bytes on success.
83pub fn assemble_bpf(text: &str) -> Result<Vec<u8>, AssembleError> {
84    let text = text.trim();
85    if text.is_empty() {
86        return Err(AssembleError::Empty);
87    }
88
89    // Fallback: raw `<bpf 0xNNNN…>` form for opcodes the
90    // decoder couldn't classify. Encode the u64 directly.
91    if let Some(rest) = text.strip_prefix("<bpf 0x") {
92        let hex = rest.strip_suffix('>').ok_or(AssembleError::NotRecognised)?;
93        let v = u64::from_str_radix(hex, 16).map_err(|_| AssembleError::NotRecognised)?;
94        return Ok(v.to_le_bytes().to_vec());
95    }
96
97    // LDDW continuation slot.
98    if let Some(rest) = text.strip_prefix("<lddw-cont 0x") {
99        let hex = rest.strip_suffix('>').ok_or(AssembleError::NotRecognised)?;
100        let high = u32::from_str_radix(hex, 16).map_err(|_| AssembleError::NotRecognised)?;
101        return Ok(encode_slot(0x00, 0, 0, 0, high as i32));
102    }
103
104    if text == "exit" {
105        return Ok(encode_slot(0x95, 0, 0, 0, 0));
106    }
107
108    // Split into mnemonic + operand list.
109    let (mnemonic, rest) = match text.find(char::is_whitespace) {
110        Some(i) => (&text[..i], text[i..].trim()),
111        None => (text, ""),
112    };
113    let operands = split_operands(rest);
114
115    match mnemonic {
116        // ---------------- LDDW + LD/LDX ----------------
117        "lddw" => assemble_lddw(&operands),
118        "ldxw" | "ldxh" | "ldxb" | "ldxdw" => assemble_ldx(mnemonic, &operands),
119
120        // ---------------- ST / STX ----------------
121        "stw" | "sth" | "stb" | "stdw" => assemble_st(mnemonic, &operands),
122        "stxw" | "stxh" | "stxb" | "stxdw" => assemble_stx(mnemonic, &operands),
123
124        // ---------------- Endian ----------------
125        // Mnemonic forms: le16/le32/le64/be16/be32/be64.
126        "le16" | "le32" | "le64" | "be16" | "be32" | "be64" => assemble_endian(mnemonic, &operands),
127
128        // ---------------- Control flow ----------------
129        "ja" => assemble_ja(&operands),
130        "call" => assemble_call(&operands, /* src=*/ 0),
131        // BPF-to-BPF intra-program calls have two encodings
132        // in the wild:
133        //
134        //   * Solana sBPF: opcode `0x85`, src=1, imm=signed
135        //     slot count. Emitted by `format_insn` as
136        //     `call <hex>` (same as syscall — the src nibble
137        //     distinguishes them in the byte stream).
138        //   * Linux eBPF + a few toolchains that misuse
139        //     `EM_BPF` for SBF: opcode `0x8d`, src=0,
140        //     imm=signed slot count.
141        //
142        // Neither is in `format_insn`'s output directly —
143        // the de-symbolizer in `ud-translate` rewrites the
144        // user-facing `call sub_<hex>` text into one of the
145        // two mnemonics below based on the original byte's
146        // opcode, so the byte-drop pass can recover the
147        // exact encoding.
148        "call_internal" => assemble_call(&operands, /* src=*/ 1),
149        "call_local" => assemble_call_local(&operands),
150        "callx" => assemble_callx(&operands),
151
152        // ---------------- ALU + jumps with suffix ----------------
153        other => assemble_alu_or_jmp(other, &operands),
154    }
155}
156
157// ────────────────────────────────────────────────────────────
158//  Encoders
159// ────────────────────────────────────────────────────────────
160
161fn encode_slot(opcode: u8, dst: u8, src: u8, offset: i16, imm: i32) -> Vec<u8> {
162    let mut out = vec![0u8; INSN_SIZE];
163    out[0] = opcode;
164    out[1] = (dst & 0x0f) | ((src & 0x0f) << 4);
165    out[2..4].copy_from_slice(&offset.to_le_bytes());
166    out[4..8].copy_from_slice(&imm.to_le_bytes());
167    out
168}
169
170fn assemble_lddw(operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
171    arity(operands, 2, "lddw")?;
172    let dst = parse_reg(operands[0])?;
173    let imm64 = parse_uint(operands[1], "lddw")?;
174    // First slot: opcode 0x18, dst=dst, src=0, imm=low32.
175    #[allow(clippy::cast_possible_truncation)]
176    let low = imm64 as u32 as i32;
177    Ok(encode_slot(0x18, dst, 0, 0, low))
178}
179
180/// BPF MEM mode bits (top 3 of the opcode byte). All
181/// memory-class instructions share `0x60`; legacy LD_ABS /
182/// LD_IND use other mode bits and aren't handled here
183/// (they don't appear in modern BPF binaries).
184const BPF_MODE_MEM: u8 = 0x60;
185
186fn assemble_ldx(mnemonic: &str, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
187    arity(operands, 2, mnemonic)?;
188    let dst = parse_reg(operands[0])?;
189    let (src, offset) = parse_mem(operands[1])?;
190    let size_bits = size_letter_to_bits(&mnemonic[3..]);
191    // LDX class = 0x01; size in bits 3..4; MEM mode = 0x60.
192    let opcode = BPF_MODE_MEM | size_bits | 0x01;
193    Ok(encode_slot(opcode, dst, src, offset, 0))
194}
195
196fn assemble_st(mnemonic: &str, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
197    arity(operands, 2, mnemonic)?;
198    let (dst, offset) = parse_mem(operands[0])?;
199    let imm = parse_int(operands[1], "st")?;
200    let size_bits = size_letter_to_bits(&mnemonic[2..]);
201    // ST class = 0x02 (imm source); size in bits 3..4; MEM mode = 0x60.
202    let opcode = BPF_MODE_MEM | size_bits | 0x02;
203    Ok(encode_slot(opcode, dst, 0, offset, imm))
204}
205
206fn assemble_stx(mnemonic: &str, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
207    arity(operands, 2, mnemonic)?;
208    let (dst, offset) = parse_mem(operands[0])?;
209    let src = parse_reg(operands[1])?;
210    let size_bits = size_letter_to_bits(&mnemonic[3..]);
211    // STX class = 0x03 (reg source); size in bits 3..4; MEM mode = 0x60.
212    let opcode = BPF_MODE_MEM | size_bits | 0x03;
213    Ok(encode_slot(opcode, dst, src, offset, 0))
214}
215
216fn size_letter_to_bits(suffix: &str) -> u8 {
217    match suffix {
218        "h" => 0x08,
219        "b" => 0x10,
220        "dw" => 0x18,
221        // "w" and any other unrecognised suffix fall through
222        // to the default W width (0x00). Unrecognised suffixes
223        // are caught earlier by the dispatch table; this is a
224        // belt-and-suspenders default.
225        _ => 0x00,
226    }
227}
228
229fn assemble_endian(mnemonic: &str, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
230    arity(operands, 1, mnemonic)?;
231    let dst = parse_reg(operands[0])?;
232    // "le" → opcode 0xd4 (ALU class + END op-nibble + imm
233    // source). "be" → 0xdc (reg-source bit set).
234    let opcode = match &mnemonic[..2] {
235        "le" => 0xd4,
236        "be" => 0xdc,
237        _ => return Err(AssembleError::UnknownMnemonic(mnemonic.into())),
238    };
239    let width: i32 = match &mnemonic[2..] {
240        "16" => 16,
241        "32" => 32,
242        "64" => 64,
243        _ => return Err(AssembleError::UnknownMnemonic(mnemonic.into())),
244    };
245    Ok(encode_slot(opcode, dst, 0, 0, width))
246}
247
248fn assemble_ja(operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
249    arity(operands, 1, "ja")?;
250    let off = parse_branch_offset(operands[0])?;
251    Ok(encode_slot(0x05, 0, 0, off, 0))
252}
253
254fn assemble_call(operands: &[&str], src: u8) -> Result<Vec<u8>, AssembleError> {
255    arity(operands, 1, "call")?;
256    let imm = parse_int_signed(operands[0], "call")?;
257    Ok(encode_slot(0x85, 0, src, 0, imm))
258}
259
260/// Linux BPF-to-BPF call: opcode `0x8d`, dst=0, src=0,
261/// imm=signed slot count.
262fn assemble_call_local(operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
263    arity(operands, 1, "call_local")?;
264    let imm = parse_int_signed(operands[0], "call_local")?;
265    Ok(encode_slot(0x8d, 0, 0, 0, imm))
266}
267
268/// Like [`parse_int`] but accepts a leading `-` so callers
269/// can pass signed slot counts (used by the desymbolised
270/// `call_internal` form, whose imm may be negative when
271/// calling a function earlier in the section).
272/// Encode the jcc instruction that drives an `ifblock` /
273/// `whileblock`'s framing.
274///
275/// `cond_text` is the *inverted* condition the renderer
276/// produces (the body runs when this is true; the jcc takes
277/// the branch when it's false). The mapping back to a BPF
278/// jcc mnemonic mirrors `invert_bpf_cond` in
279/// `decompile/bpf.rs`:
280///
281///   * `!=`  → `jeq`    (jeq takes when ==, body runs when !=)
282///   * `==`  → `jne`
283///   * `<=`  → `jgt`    (the unsigned form; `jsgt` for signed)
284///   * `<`   → `jge`
285///   * `>=`  → `jlt`
286///   * `>`   → `jle`
287///
288/// `slot_offset` is the BPF-relative slot count the jcc
289/// must skip — typically the body's lowered size measured
290/// in 8-byte slots, beyond the slot immediately after the
291/// jcc itself.
292///
293/// Returns the 8-byte encoded slot, or an error if
294/// `cond_text` has an unsupported shape (`jset`,
295/// composite expressions, etc.). The byte-drop pass treats
296/// the error as "keep the original bytes pinned."
297pub fn assemble_bpf_ifblock_cond(
298    cond_text: &str,
299    slot_offset: i16,
300) -> Result<Vec<u8>, AssembleError> {
301    let (lhs, op, rhs) = parse_ifblock_cond(cond_text)?;
302    let mnemonic = match op {
303        "!=" => "jeq",
304        "==" => "jne",
305        "<=" => "jgt",
306        "<" => "jge",
307        ">=" => "jlt",
308        ">" => "jle",
309        _ => return Err(AssembleError::UnknownMnemonic(op.into())),
310    };
311    let offset_text = if slot_offset >= 0 {
312        format!("+0x{slot_offset:x}")
313    } else {
314        format!("-0x{:x}", -i32::from(slot_offset))
315    };
316    assemble_bpf(&format!("{mnemonic} {lhs}, {rhs}, {offset_text}"))
317}
318
319/// Convenience: encode `ja +offset` / `ja -offset`. Used
320/// for `then_tail_jmp` (jumps over an else body) and
321/// `tail_bytes` (back-edge of a while loop). Always 8 bytes.
322pub fn assemble_bpf_ja(slot_offset: i16) -> Result<Vec<u8>, AssembleError> {
323    let offset_text = if slot_offset >= 0 {
324        format!("+0x{slot_offset:x}")
325    } else {
326        format!("-0x{:x}", -i32::from(slot_offset))
327    };
328    assemble_bpf(&format!("ja {offset_text}"))
329}
330
331/// Split an inverted-condition string of the shape
332/// `"rA op rB"` or `"rA op 0xN"` into `(lhs, op, rhs)`.
333/// Returns `Err(NotRecognised)` for any composite form
334/// (`(rA & rB) == 0` for `jset`, multi-clause expressions,
335/// etc.).
336fn parse_ifblock_cond(cond: &str) -> Result<(&str, &str, &str), AssembleError> {
337    let cond = cond.trim();
338    if cond.starts_with('(') {
339        // Composite (jset / nested) — out of scope.
340        return Err(AssembleError::NotRecognised);
341    }
342    // Two-operator ops must come first so the single-char
343    // splits don't grab them: scan for "!=", "==", "<=", ">="
344    // before "<" / ">".
345    for op in ["!=", "==", "<=", ">="] {
346        if let Some(at) = find_top_level_op(cond, op) {
347            let lhs = cond[..at].trim();
348            let rhs = cond[at + op.len()..].trim();
349            return Ok((lhs, op, rhs));
350        }
351    }
352    for op in ["<", ">"] {
353        if let Some(at) = find_top_level_op(cond, op) {
354            let lhs = cond[..at].trim();
355            let rhs = cond[at + op.len()..].trim();
356            return Ok((lhs, op, rhs));
357        }
358    }
359    Err(AssembleError::NotRecognised)
360}
361
362/// `find` that respects single-char op boundaries — won't
363/// match `<` inside `<=` because `<=` is checked first.
364fn find_top_level_op(cond: &str, op: &str) -> Option<usize> {
365    cond.find(op)
366}
367
368/// Helper for `desymbolize_bpf_text`: compute the signed
369/// slot offset between two instruction addresses, expressed
370/// in BPF slot units (8 bytes). Returns `None` when the
371/// delta isn't slot-aligned (would never happen for a
372/// well-formed BPF binary but defensively guarded).
373fn slot_offset_from(target: u64, insn_addr: u64) -> Option<i64> {
374    let next_slot = insn_addr.wrapping_add(INSN_SIZE as u64);
375    #[allow(clippy::cast_possible_wrap)]
376    let delta = (target as i64).wrapping_sub(next_slot as i64);
377    if delta % (INSN_SIZE as i64) != 0 {
378        return None;
379    }
380    Some(delta / (INSN_SIZE as i64))
381}
382
383/// Recognise a textual `call <name>` (or `call_local
384/// <name>`) where `<name>` is symbolic — a function name
385/// like `abort`, `sol_log_`, `entrypoint`, etc. — rather
386/// than a numeric immediate the assembler can parse
387/// directly. Used by the syscall-placeholder branch of
388/// `desymbolize_bpf_text` to skip pure-form callees that
389/// the assembler already handles.
390fn is_symbolic_callee(name: &str) -> bool {
391    let name = name.trim();
392    if name.is_empty() {
393        return false;
394    }
395    if name.starts_with("0x") || name.starts_with("0X") {
396        return false;
397    }
398    if name.starts_with("sub_") {
399        return false;
400    }
401    // First char must be a letter or `_`; pure-numeric
402    // literals are caught above. Conditional-jump shapes
403    // like `r1, r2, label_X` would have a `,` in `name`
404    // — exclude those too.
405    let first = name.as_bytes()[0];
406    if first.is_ascii_digit() {
407        return false;
408    }
409    if name.contains(',') {
410        return false;
411    }
412    true
413}
414
415fn parse_int_signed(text: &str, ctx: &'static str) -> Result<i32, AssembleError> {
416    let t = text.trim();
417    if let Some(rest) = t.strip_prefix('-') {
418        let v = parse_uint(rest, ctx)?;
419        if v > 0x8000_0000 {
420            return Err(AssembleError::ImmediateOverflow { value: v, bits: 32 });
421        }
422        #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
423        return Ok(-(v as i64) as i32);
424    }
425    parse_int(t, ctx)
426}
427
428fn assemble_callx(operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
429    arity(operands, 1, "callx")?;
430    let dst = parse_reg(operands[0])?;
431    // callx encodes its register in `dst`; offset/imm zero.
432    Ok(encode_slot(0x8d, dst, 0, 0, 0))
433}
434
435/// All ALU and conditional-jump mnemonics share the same
436/// "{op}{suffix} dst, rhs" or "{op}{32} dst, rhs, +off"
437/// surface. Dispatch by stripping the size suffix
438/// ("32"/"64") and looking the op nibble up.
439fn assemble_alu_or_jmp(mnemonic: &str, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
440    // Determine size suffix.
441    let (base, alu64, jmp32) = if let Some(b) = mnemonic.strip_suffix("64") {
442        (b, true, false)
443    } else if let Some(b) = mnemonic.strip_suffix("32") {
444        // Either ALU32 (b is an ALU op like "add") or
445        // JMP32 (b is a jcc like "jeq"). We decide later.
446        (b, false, true)
447    } else {
448        (mnemonic, false, false)
449    };
450
451    // Jump opcodes (with optional 32-bit class).
452    if let Some(op_nibble) = jmp_op_nibble(base) {
453        return assemble_jmp(op_nibble, jmp32, operands);
454    }
455
456    // ALU opcodes. Resolve to op nibble + (alu64 default
457    // when no suffix).
458    let op_nibble =
459        alu_op_nibble(base).ok_or_else(|| AssembleError::UnknownMnemonic(mnemonic.into()))?;
460    let alu_class: u8 = if alu64 { 0x07 } else { 0x04 };
461
462    if op_nibble == 0x8 {
463        // NEG — single-operand unary.
464        arity(operands, 1, mnemonic)?;
465        let dst = parse_reg(operands[0])?;
466        // No source bit (no src reg, no imm — pure unary
467        // is encoded with the imm-source variant by
468        // convention).
469        let opcode = (op_nibble << 4) | alu_class;
470        return Ok(encode_slot(opcode, dst, 0, 0, 0));
471    }
472
473    arity(operands, 2, mnemonic)?;
474    let dst = parse_reg(operands[0])?;
475    let (is_reg, src, imm) = parse_alu_rhs(operands[1])?;
476    let src_bit: u8 = if is_reg { 0x08 } else { 0x00 };
477    let opcode = (op_nibble << 4) | src_bit | alu_class;
478    Ok(encode_slot(opcode, dst, src, 0, imm))
479}
480
481fn assemble_jmp(op_nibble: u8, is_32: bool, operands: &[&str]) -> Result<Vec<u8>, AssembleError> {
482    let mnemonic_for_err = "jcc";
483    if operands.len() != 3 {
484        return Err(AssembleError::WrongArity {
485            mnemonic: mnemonic_for_err.into(),
486            expected: 3,
487            got: operands.len(),
488        });
489    }
490    let dst = parse_reg(operands[0])?;
491    let (is_reg, src, imm) = parse_alu_rhs(operands[1])?;
492    let off = parse_branch_offset(operands[2])?;
493    let src_bit: u8 = if is_reg { 0x08 } else { 0x00 };
494    let class: u8 = if is_32 { 0x06 } else { 0x05 };
495    let opcode = (op_nibble << 4) | src_bit | class;
496    Ok(encode_slot(opcode, dst, src, off, imm))
497}
498
499fn alu_op_nibble(base: &str) -> Option<u8> {
500    // Covers Linux/sBPFv1/sBPFv2 mnemonics. Op nibble is
501    // identical to the byte's high 4 bits — the variant
502    // only changes the textual name.
503    Some(match base {
504        "add" => 0x0,
505        "sub" => 0x1,
506        "mul" => 0x2,
507        "div" | "udiv" => 0x3,
508        "or" => 0x4,
509        "and" => 0x5,
510        "lsh" => 0x6,
511        "rsh" => 0x7,
512        "neg" => 0x8,
513        "mod" | "urem" => 0x9,
514        "xor" => 0xa,
515        "mov" => 0xb,
516        "arsh" => 0xc,
517        "sdiv" => 0xe,
518        "srem" => 0xf,
519        _ => return None,
520    })
521}
522
523fn jmp_op_nibble(base: &str) -> Option<u8> {
524    Some(match base {
525        "jeq" => 0x1,
526        "jgt" => 0x2,
527        "jge" => 0x3,
528        "jset" => 0x4,
529        "jne" => 0x5,
530        "jsgt" => 0x6,
531        "jsge" => 0x7,
532        "jlt" => 0xa,
533        "jle" => 0xb,
534        "jslt" => 0xc,
535        "jsle" => 0xd,
536        _ => return None,
537    })
538}
539
540// ────────────────────────────────────────────────────────────
541//  Operand parsing
542// ────────────────────────────────────────────────────────────
543
544fn split_operands(rest: &str) -> Vec<&str> {
545    if rest.is_empty() {
546        return Vec::new();
547    }
548    // `format_offset` produces shapes like "[r5 + 0x10]",
549    // "[r5 - 0x10]", "[r5]". The contained " + " / " - " is
550    // INSIDE brackets and must not split. Use a simple
551    // bracket-depth-aware splitter.
552    let mut out: Vec<&str> = Vec::new();
553    let bytes = rest.as_bytes();
554    let mut depth: i32 = 0;
555    let mut start = 0usize;
556    for (i, &b) in bytes.iter().enumerate() {
557        match b {
558            b'[' => depth += 1,
559            b']' => depth -= 1,
560            b',' if depth == 0 => {
561                out.push(rest[start..i].trim());
562                start = i + 1;
563            }
564            _ => {}
565        }
566    }
567    out.push(rest[start..].trim());
568    out
569}
570
571fn arity(operands: &[&str], expected: usize, mnemonic: &str) -> Result<(), AssembleError> {
572    if operands.len() == expected {
573        Ok(())
574    } else {
575        Err(AssembleError::WrongArity {
576            mnemonic: mnemonic.into(),
577            expected,
578            got: operands.len(),
579        })
580    }
581}
582
583fn parse_reg(text: &str) -> Result<u8, AssembleError> {
584    let t = text.trim();
585    let rest = t
586        .strip_prefix('r')
587        .ok_or_else(|| AssembleError::BadOperand(t.into(), "register"))?;
588    let n: u32 = rest
589        .parse()
590        .map_err(|_| AssembleError::BadOperand(t.into(), "register number"))?;
591    if n > 10 {
592        return Err(AssembleError::BadRegister(n));
593    }
594    #[allow(clippy::cast_possible_truncation)]
595    Ok(n as u8)
596}
597
598/// Parse a u64 immediate as printed by `format_insn` —
599/// `format!("0x{:x}", imm as u32)` for ALU/ldx/stx, or
600/// `format!("0x{:x}", imm64)` for lddw. Accepts both `0x`
601/// prefix and plain decimal.
602fn parse_uint(text: &str, ctx: &'static str) -> Result<u64, AssembleError> {
603    let t = text.trim();
604    if let Some(hex) = t.strip_prefix("0x") {
605        return u64::from_str_radix(hex, 16).map_err(|_| AssembleError::BadOperand(t.into(), ctx));
606    }
607    t.parse::<u64>()
608        .map_err(|_| AssembleError::BadOperand(t.into(), ctx))
609}
610
611/// Parse a u32 immediate (or a sign-extendable u64 that
612/// fits) into the i32 the BPF imm field carries. Matches
613/// `format!("0x{:x}", imm as u32)` so the round-trip is
614/// exact for any 32-bit pattern.
615fn parse_int(text: &str, ctx: &'static str) -> Result<i32, AssembleError> {
616    let v = parse_uint(text, ctx)?;
617    if v > u64::from(u32::MAX) {
618        return Err(AssembleError::ImmediateOverflow { value: v, bits: 32 });
619    }
620    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
621    Ok(v as u32 as i32)
622}
623
624fn parse_alu_rhs(text: &str) -> Result<(bool, u8, i32), AssembleError> {
625    let t = text.trim();
626    if t.starts_with('r') {
627        let r = parse_reg(t)?;
628        return Ok((true, r, 0));
629    }
630    let imm = parse_int(t, "alu rhs")?;
631    Ok((false, 0, imm))
632}
633
634/// Parse `format_offset` shapes: `[rS]`, `[rS + 0xN]`,
635/// `[rS - 0xN]`. Returns `(src, offset_i16)`.
636fn parse_mem(text: &str) -> Result<(u8, i16), AssembleError> {
637    let t = text.trim();
638    let inner = t
639        .strip_prefix('[')
640        .and_then(|s| s.strip_suffix(']'))
641        .ok_or_else(|| AssembleError::BadOperand(t.into(), "memory operand"))?
642        .trim();
643    // Split off a trailing " + 0x…" or " - 0x…".
644    if let Some(idx) = inner.rfind(" + ") {
645        let reg = parse_reg(inner[..idx].trim())?;
646        let off = parse_offset_value(&inner[idx + 3..])?;
647        let off_i16 = i16::try_from(off).map_err(|_| AssembleError::OffsetOverflow(off))?;
648        return Ok((reg, off_i16));
649    }
650    if let Some(idx) = inner.rfind(" - ") {
651        let reg = parse_reg(inner[..idx].trim())?;
652        let off = parse_offset_value(&inner[idx + 3..])?;
653        let neg = -off;
654        let off_i16 = i16::try_from(neg).map_err(|_| AssembleError::OffsetOverflow(neg))?;
655        return Ok((reg, off_i16));
656    }
657    Ok((parse_reg(inner)?, 0))
658}
659
660fn parse_offset_value(text: &str) -> Result<i64, AssembleError> {
661    let t = text.trim();
662    if let Some(hex) = t.strip_prefix("0x") {
663        let v = u64::from_str_radix(hex, 16)
664            .map_err(|_| AssembleError::BadOperand(t.into(), "offset"))?;
665        #[allow(clippy::cast_possible_wrap)]
666        return Ok(v as i64);
667    }
668    t.parse::<i64>()
669        .map_err(|_| AssembleError::BadOperand(t.into(), "offset"))
670}
671
672/// Parse `format_branch_offset` shapes: `+0xN` / `-0xN`.
673fn parse_branch_offset(text: &str) -> Result<i16, AssembleError> {
674    let t = text.trim();
675    let (sign, rest) = if let Some(r) = t.strip_prefix('+') {
676        (1i64, r)
677    } else if let Some(r) = t.strip_prefix('-') {
678        (-1i64, r)
679    } else {
680        return Err(AssembleError::BadOperand(t.into(), "branch offset"));
681    };
682    let v = if let Some(hex) = rest.strip_prefix("0x") {
683        u64::from_str_radix(hex, 16)
684            .map_err(|_| AssembleError::BadOperand(t.into(), "branch offset"))?
685    } else {
686        rest.parse::<u64>()
687            .map_err(|_| AssembleError::BadOperand(t.into(), "branch offset"))?
688    };
689    #[allow(clippy::cast_possible_wrap)]
690    let signed = sign * (v as i64);
691    i16::try_from(signed).map_err(|_| AssembleError::OffsetOverflow(signed))
692}
693
694/// Convert a symbolic BPF @asm text — the form
695/// `crates/ud-translate/src/decompile/bpf.rs` produces
696/// after applying `label_<hex>` and `sub_<hex>` rewrites —
697/// into the numeric form [`assemble_bpf`] accepts.
698///
699/// The rewrites both encode their target address into the
700/// name (`label_4ab28` ↔ address 0x4ab28, same for
701/// `sub_<hex>`). Recovering the address is therefore as
702/// simple as parsing the hex suffix; no map lookup needed.
703/// `insn_addr` is the address of the @asm being assembled
704/// — branch offsets and internal-call imms are slot-relative
705/// to the *next* instruction (insn_addr + 8).
706///
707/// When the input has no symbolic refs, the output is the
708/// input unchanged. Returns `None` when a symbolic name
709/// doesn't parse to a hex address — the caller treats that
710/// the same as an assembler error and keeps the bytes
711/// pinned.
712#[must_use]
713pub fn desymbolize_bpf_text(text: &str, insn_addr: u64, opcode_hint: Option<u8>) -> Option<String> {
714    // Intra-program calls — EXPLICIT `call_local sub_<hex>`
715    // form. The renderer emits this when the original byte
716    // encoding is the Linux BPF-to-BPF opcode `0x8d`. The
717    // mnemonic itself tells us which assembler path to take
718    // — opcode_hint is moot here.
719    if let Some(rest) = text.strip_prefix("call_local sub_") {
720        let target = u64::from_str_radix(rest.trim(), 16).ok()?;
721        let slots = slot_offset_from(target, insn_addr)?;
722        return Some(format!("call_local {slots}"));
723    }
724
725    // Intra-program calls — Solana sBPF default form. The
726    // renderer emits plain `call sub_<hex>` for the
727    // `0x85 src=1` encoding.
728    if let Some(rest) = text.strip_prefix("call sub_") {
729        let target = u64::from_str_radix(rest.trim(), 16).ok()?;
730        let slots = slot_offset_from(target, insn_addr)?;
731        let mnemonic = match opcode_hint {
732            Some(0x8d) => "call_local",
733            _ => "call_internal",
734        };
735        return Some(format!("{mnemonic} {slots}"));
736    }
737
738    // Syscall placeholders — `call <name>` (or
739    // `call_local <name>`) where the name isn't a
740    // `sub_<hex>` placeholder. Solana SBF programs emit
741    // these with the imm field set to `-1` (0xffffffff) as
742    // a relocation marker the loader patches at load time.
743    // We rewrite to `call_internal -1` / `call_local -1`;
744    // the byte-drop pass's match-test catches sites whose
745    // original imm wasn't `-1` and keeps those pinned.
746    if let Some(name) = text.strip_prefix("call ") {
747        if is_symbolic_callee(name) {
748            return Some("call_internal -1".to_string());
749        }
750    }
751    if let Some(name) = text.strip_prefix("call_local ") {
752        if is_symbolic_callee(name) {
753            return Some("call_local -1".to_string());
754        }
755    }
756
757    // Conditional jumps + `ja`: replace a trailing
758    // `, label_<hex>` (or `, label_<hex>` after the third
759    // operand for `jXX`) with the slot-relative `+0xN` /
760    // `-0xN` shape `assemble_bpf` parses.
761    if let Some(label_at) = text.find(", label_").or_else(|| text.find(" label_")) {
762        // Two shapes:
763        //   `jXX rA, rhs, label_<hex>`  — JmpCond, 3 operands
764        //   `ja label_<hex>`            — 1 operand
765        let prefix = &text[..label_at];
766        let suffix_offset = label_at
767            + match text.as_bytes().get(label_at) {
768                Some(b',') => 2, // ", "
769                _ => 1,          // " "
770            };
771        let label_name = &text[suffix_offset..];
772        let hex = label_name.strip_prefix("label_")?;
773        let target = u64::from_str_radix(hex.trim(), 16).ok()?;
774        let next_slot = insn_addr.wrapping_add(INSN_SIZE as u64);
775        #[allow(clippy::cast_possible_wrap)]
776        let delta = (target as i64).wrapping_sub(next_slot as i64);
777        if delta % (INSN_SIZE as i64) != 0 {
778            return None;
779        }
780        let slot_offset = delta / (INSN_SIZE as i64);
781        let offset_text = if slot_offset >= 0 {
782            format!("+0x{slot_offset:x}")
783        } else {
784            format!("-0x{:x}", -slot_offset)
785        };
786        let separator = if text.as_bytes().get(label_at) == Some(&b',') {
787            ", "
788        } else {
789            " "
790        };
791        return Some(format!("{prefix}{separator}{offset_text}"));
792    }
793
794    // String-resolved `lddw rN, "literal" @0x<imm>` — the
795    // renderer rewrites the imm64 to its rodata literal for
796    // readability and appends `@0x<imm>` so the address is
797    // still recoverable. We strip the string and substitute
798    // the numeric form the assembler accepts.
799    if let Some(rest) = text.strip_prefix("lddw ") {
800        if let Some(at) = rest.find(" @0x") {
801            let head_with_reg = &rest[..at]; // "rN, \"string\""
802            let imm_text = &rest[at + 4..]; // "<hex>"
803                                            // The bit before the comma is the register
804                                            // (it carries no rewritable syntax). Keep that
805                                            // and drop the string literal.
806            if let Some(comma) = head_with_reg.find(',') {
807                let reg = head_with_reg[..comma].trim();
808                return Some(format!("lddw {reg}, 0x{}", imm_text.trim()));
809            }
810        }
811    }
812
813    // Stack-slot rewrites — the BPF renderer collapses
814    // `[r10 - 0xN]` to `[local_<N>]` (local var) and
815    // `[r10 + 0xN]` to `[arg_<N>]` (incoming arg slot).
816    // Reverse those so `assemble_bpf` can parse the
817    // resulting `[r10 ± 0xN]` form.
818    let mut s = text.to_string();
819    let mut changed = false;
820    if s.contains("[local_") {
821        s = rewrite_stack_slot(&s, "[local_", "[r10 - 0x");
822        changed = true;
823    }
824    if s.contains("[arg_") {
825        s = rewrite_stack_slot(&s, "[arg_", "[r10 + 0x");
826        changed = true;
827    }
828    if changed {
829        return Some(s);
830    }
831
832    // Nothing to de-symbolize — return as-is so the caller
833    // can still attempt assembly on the pure-form path.
834    Some(text.to_string())
835}
836
837/// Rewrite every occurrence of `prefix<hex>]` in `text`
838/// (e.g. `[local_40]`) to `replacement<hex>]` (e.g.
839/// `[r10 - 0x40]`). The hex body is preserved verbatim; the
840/// only change is the prefix/suffix wrapping.
841fn rewrite_stack_slot(text: &str, prefix: &str, replacement: &str) -> String {
842    let mut out = String::with_capacity(text.len());
843    let mut rest = text;
844    while let Some(at) = rest.find(prefix) {
845        out.push_str(&rest[..at]);
846        let after = &rest[at + prefix.len()..];
847        // Find the closing `]`.
848        if let Some(close) = after.find(']') {
849            let hex = &after[..close];
850            out.push_str(replacement);
851            out.push_str(hex);
852            out.push(']');
853            rest = &after[close + 1..];
854        } else {
855            // Malformed (no closing `]`) — bail with the
856            // partial output appended; the caller's
857            // assemble step will fail cleanly.
858            out.push_str(&rest[at..]);
859            return out;
860        }
861    }
862    out.push_str(rest);
863    out
864}
865
866#[cfg(test)]
867mod tests {
868    use super::*;
869    use crate::{decode, format_insn, BpfVariant};
870
871    /// Round-trip property: for every decodable instruction
872    /// the assembler reproduces the same bytes from the
873    /// disassembled text.
874    fn roundtrip(bytes: &[u8], variant: BpfVariant) {
875        let insns = decode(bytes, 0, variant).expect("decode");
876        let mut cursor = 0usize;
877        for insn in &insns {
878            let text = format_insn(insn, variant);
879            let asm =
880                assemble_bpf(&text).unwrap_or_else(|e| panic!("assemble failed: {text:?} → {e:?}"));
881            assert_eq!(
882                asm.as_slice(),
883                &bytes[cursor..cursor + INSN_SIZE],
884                "mismatch on {text:?}: assembled {asm:?}, original {:?}",
885                &bytes[cursor..cursor + INSN_SIZE]
886            );
887            cursor += INSN_SIZE;
888        }
889    }
890
891    #[test]
892    fn alu_immediate_and_register() {
893        roundtrip(
894            &[
895                0xb7, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, // mov64 r1, 42
896                0xbf, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov64 r1, r2
897                0x07, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, // add64 r1, 0x10
898                0x0f, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // add64 r1, r2
899                0xb4, 0x03, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, // mov32 r3, 0xffffffff
900                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
901            ],
902            BpfVariant::Linux,
903        );
904    }
905
906    #[test]
907    fn loads_and_stores_all_widths() {
908        roundtrip(
909            &[
910                0x79, 0xa1, 0xf8, 0xff, 0x00, 0x00, 0x00, 0x00, // ldxdw r1, [r10 - 8]
911                0x71, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ldxb r2, [r1]
912                0x69, 0x13, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, // ldxh r3, [r1 + 2]
913                0x61, 0x14, 0xfc, 0xff, 0x00, 0x00, 0x00, 0x00, // ldxw r4, [r1 - 4]
914                0x7b, 0x1a, 0xe0, 0xff, 0x00, 0x00, 0x00, 0x00, // stxdw [r10 - 32], r1
915                0x73, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stxb [r1], r2
916                0x62, 0x0a, 0xf0, 0xff, 0x42, 0x00, 0x00, 0x00, // stw [r10 - 16], 0x42
917                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
918            ],
919            BpfVariant::Linux,
920        );
921    }
922
923    #[test]
924    fn branches_and_calls() {
925        // jne (reg src) uses opcode 0x5d (op=jne, src=reg, class=JMP).
926        // The src nibble in byte 1 carries the source register.
927        roundtrip(
928            &[
929                0x15, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, // jeq r1, 0, +2
930                0x5d, 0x21, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00, // jne r1, r2, -5
931                0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // ja +1
932                0x85, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00,
933                0x00, // call 0x7 (src=0, syscall-style)
934                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
935            ],
936            BpfVariant::Linux,
937        );
938    }
939
940    /// When the disassembler can't infer the mnemonic
941    /// from an opcode (e.g. an undefined op-nibble in a
942    /// known class), the text comes out as `"<jcc?>"` /
943    /// `"<alu?>"`. The assembler returns `Err(UnknownMnemonic)`
944    /// for those — the decompile-time byte-drop pass treats
945    /// the Err the same way it treats a parse failure: it
946    /// keeps the pinned bytes.
947    #[test]
948    fn unrecognised_mnemonic_text_returns_err() {
949        let err = assemble_bpf("<jcc?> r1, r2, +0x1").unwrap_err();
950        assert!(matches!(err, AssembleError::UnknownMnemonic(_)));
951    }
952
953    /// `call sub_<hex>` desymbolises to `call_internal <slot_count>`,
954    /// which assembles to opcode 0x85 with src=1 and the
955    /// computed relative slot count in imm — matching the
956    /// bytes Solana BPF emits for an intra-program call.
957    #[test]
958    fn desymbolise_internal_call_round_trips() {
959        // Forward call from 0x1000 to 0x2000 — 0x1000 / 8 =
960        // 0x200 slots forward; but the offset is computed
961        // from the next slot (0x1008), so 0xff8 / 8 = 0x1ff slots.
962        let text = "call sub_2000";
963        let desym = desymbolize_bpf_text(text, 0x1000, None).unwrap();
964        assert_eq!(desym, "call_internal 511"); // 0xff8 / 8 = 511
965        let bytes = assemble_bpf(&desym).unwrap();
966        assert_eq!(bytes[0], 0x85); // call opcode
967        assert_eq!(bytes[1], 0x10); // src=1, dst=0
968        let imm = i32::from_le_bytes(bytes[4..8].try_into().unwrap());
969        assert_eq!(imm, 511);
970    }
971
972    #[test]
973    fn desymbolise_backward_call() {
974        // Backward call: from 0x2000 to 0x1000.
975        // Next slot = 0x2008, target = 0x1000, delta = -0x1008 / 8 = -513.
976        let text = "call sub_1000";
977        let desym = desymbolize_bpf_text(text, 0x2000, None).unwrap();
978        assert_eq!(desym, "call_internal -513");
979        let bytes = assemble_bpf(&desym).unwrap();
980        let imm = i32::from_le_bytes(bytes[4..8].try_into().unwrap());
981        assert_eq!(imm, -513);
982    }
983
984    #[test]
985    fn desymbolise_jcc_label_round_trips() {
986        // jeq r1, 0x0, label_1010 at insn_addr 0x1000:
987        //   next_slot = 0x1008, target = 0x1010, delta = 8,
988        //   slot_offset = +1.
989        let text = "jeq r1, 0x0, label_1010";
990        let desym = desymbolize_bpf_text(text, 0x1000, None).unwrap();
991        assert_eq!(desym, "jeq r1, 0x0, +0x1");
992        let bytes = assemble_bpf(&desym).unwrap();
993        assert_eq!(bytes[0], 0x15); // jeq imm-src JMP
994        let off = i16::from_le_bytes(bytes[2..4].try_into().unwrap());
995        assert_eq!(off, 1);
996    }
997
998    #[test]
999    fn desymbolise_backward_jcc() {
1000        // jgt r2, r3, label_1000 at insn_addr 0x1020:
1001        //   next_slot = 0x1028, target = 0x1000, delta = -0x28,
1002        //   slot_offset = -5.
1003        let text = "jgt r2, r3, label_1000";
1004        let desym = desymbolize_bpf_text(text, 0x1020, None).unwrap();
1005        assert_eq!(desym, "jgt r2, r3, -0x5");
1006        let bytes = assemble_bpf(&desym).unwrap();
1007        let off = i16::from_le_bytes(bytes[2..4].try_into().unwrap());
1008        assert_eq!(off, -5);
1009    }
1010
1011    #[test]
1012    fn desymbolise_ja_label() {
1013        // ja label_1008 at insn_addr 0x1000:
1014        //   next_slot = 0x1008, target = 0x1008, delta = 0.
1015        let text = "ja label_1008";
1016        let desym = desymbolize_bpf_text(text, 0x1000, None).unwrap();
1017        assert_eq!(desym, "ja +0x0");
1018        let bytes = assemble_bpf(&desym).unwrap();
1019        assert_eq!(bytes[0], 0x05);
1020        let off = i16::from_le_bytes(bytes[2..4].try_into().unwrap());
1021        assert_eq!(off, 0);
1022    }
1023
1024    #[test]
1025    fn desymbolise_non_symbolic_text_passes_through() {
1026        let text = "ldxdw r0, [r5 - 0xff8]";
1027        assert_eq!(desymbolize_bpf_text(text, 0x1000, None).unwrap(), text);
1028    }
1029
1030    #[test]
1031    fn desymbolise_syscall_call_yields_relocation_placeholder() {
1032        // Solana SBF syscalls (and the `abort` stub) carry
1033        // a literal `-1` imm at decompile time — the loader
1034        // patches it at load. We rewrite `call <name>`
1035        // (any non-`sub_<hex>` callee) to `call_internal -1`
1036        // so the byte-drop pass can recover the encoding
1037        // when the original bytes used that placeholder.
1038        let dsym = desymbolize_bpf_text("call sol_log_", 0x1000, None).unwrap();
1039        assert_eq!(dsym, "call_internal -1");
1040        let bytes = assemble_bpf(&dsym).unwrap();
1041        assert_eq!(bytes, vec![0x85, 0x10, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff]);
1042    }
1043
1044    #[test]
1045    fn desymbolise_lddw_with_string_and_addr_annotation() {
1046        // Renderer-side form: a string-resolved lddw
1047        // carries the rodata address as an `@0xN` suffix so
1048        // the lower path can reproduce the bytes from text
1049        // alone. The desymbolizer drops the string and
1050        // forwards the address to the assembler.
1051        let text = r#"lddw r3, "src/extension/mod.rs" @0x52b20"#;
1052        let dsym = desymbolize_bpf_text(text, 0x1000, None).unwrap();
1053        assert_eq!(dsym, "lddw r3, 0x52b20");
1054        let bytes = assemble_bpf(&dsym).unwrap();
1055        assert_eq!(bytes, vec![0x18, 0x03, 0x00, 0x00, 0x20, 0x2b, 0x05, 0x00]);
1056    }
1057
1058    #[test]
1059    fn desymbolise_call_local_explicit_form() {
1060        // `call_local sub_<hex>` in the .ud text — the
1061        // explicit Linux BPF-to-BPF form. Yields the 0x8d
1062        // opcode regardless of opcode_hint.
1063        let dsym = desymbolize_bpf_text("call_local sub_1010", 0x1000, None).unwrap();
1064        assert_eq!(dsym, "call_local 1");
1065        let bytes = assemble_bpf(&dsym).unwrap();
1066        assert_eq!(bytes[0], 0x8d);
1067        let imm = i32::from_le_bytes(bytes[4..8].try_into().unwrap());
1068        assert_eq!(imm, 1);
1069    }
1070
1071    #[test]
1072    fn lddw_pair() {
1073        roundtrip(
1074            &[
1075                0x18, 0x01, 0x00, 0x00, 0xbe, 0xba, 0xfe, 0xca, // lddw r1, 0x...cafebabe
1076                0x00, 0x00, 0x00, 0x00, 0xef, 0xbe, 0xad, 0xde, // continuation 0xdeadbeef
1077                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
1078            ],
1079            BpfVariant::Linux,
1080        );
1081    }
1082
1083    #[test]
1084    fn callx_and_exit() {
1085        roundtrip(
1086            &[
1087                0x8d, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // callx r1
1088                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
1089            ],
1090            BpfVariant::Sbfv1,
1091        );
1092    }
1093
1094    #[test]
1095    fn endian_ops() {
1096        roundtrip(
1097            &[
1098                0xd4, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, // le16 r1
1099                0xd4, 0x02, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, // le32 r2
1100                0xd4, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // le64 r3
1101                0xdc, 0x04, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, // be16 r4
1102                0xdc, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // be64 r5
1103                0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // exit
1104            ],
1105            BpfVariant::Linux,
1106        );
1107    }
1108
1109    #[test]
1110    fn raw_bpf_form_passes_through() {
1111        // The `<bpf 0xNNN…>` fallback exists for forward-
1112        // compat with future decoder paths that may emit it
1113        // for truly unknown opcodes. Today no decoder path
1114        // produces it (every byte lands in one of the
1115        // class branches of `format_insn`), but the
1116        // assembler still accepts it so the round-trip
1117        // contract holds the day a new opcode arrives.
1118        let bytes = [0xee, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
1119        let text = format!("<bpf 0x{:016x}>", u64::from_le_bytes(bytes));
1120        let asm = assemble_bpf(&text).unwrap();
1121        assert_eq!(asm.as_slice(), &bytes);
1122    }
1123
1124    #[test]
1125    fn symbolic_text_not_recognised() {
1126        // The translation layer rewrites "call 0x..." into
1127        // "call sub_X" — that text is symbolic; the
1128        // assembler returns Err and the byte-drop pass
1129        // keeps the pinned bytes.
1130        let r = assemble_bpf("call sub_4ab28");
1131        assert!(matches!(r, Err(AssembleError::BadOperand(_, _))));
1132    }
1133}