Skip to main content

sbpf_common/
instruction.rs

1use {
2    crate::{
3        errors::SBPFError,
4        inst_handler::{OPCODE_TO_HANDLER, OPCODE_TO_TYPE},
5        inst_param::{Number, Register},
6        opcode::{Opcode, OperationType},
7        syscalls::REGISTERED_SYSCALLS,
8    },
9    core::ops::Range,
10    either::Either,
11    serde::{Deserialize, Serialize},
12};
13
14#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
15pub enum AsmFormat {
16    #[default]
17    Default,
18    Llvm,
19}
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct Instruction {
23    pub opcode: Opcode,
24    pub dst: Option<Register>,
25    pub src: Option<Register>,
26    pub off: Option<Either<String, i16>>,
27    pub imm: Option<Either<String, Number>>,
28    pub span: Range<usize>,
29}
30
31impl Instruction {
32    pub fn get_size(&self) -> u64 {
33        match self.opcode {
34            Opcode::Lddw => 16,
35            _ => 8,
36        }
37    }
38
39    fn get_opcode_type(&self) -> OperationType {
40        *OPCODE_TO_TYPE.get(&self.opcode).unwrap()
41    }
42
43    pub fn is_jump(&self) -> bool {
44        matches!(
45            self.get_opcode_type(),
46            OperationType::Jump | OperationType::JumpImmediate | OperationType::JumpRegister
47        )
48    }
49
50    /// Checks if the instruction is a syscall.
51    /// This should be used only when the call label hasn't been resolved to -1.
52    pub fn is_syscall(&self) -> bool {
53        if self.opcode == Opcode::Call
54            && let Some(Either::Left(identifier)) = &self.imm
55        {
56            return REGISTERED_SYSCALLS.contains(&identifier.as_str());
57        }
58        false
59    }
60
61    // only used for be/le
62    pub fn op_imm_bits(&self) -> Result<String, SBPFError> {
63        match &self.imm {
64            Some(Either::Right(Number::Int(imm))) => match *imm {
65                16 => Ok(format!("{}16", self.opcode)),
66                32 => Ok(format!("{}32", self.opcode)),
67                64 => Ok(format!("{}64", self.opcode)),
68                _ => Err(SBPFError::BytecodeError {
69                    error: format!(
70                        "Invalid immediate value: {:?} for opcode: {:?}",
71                        self.imm, self.opcode
72                    ),
73                    span: self.span.clone(),
74                    custom_label: None,
75                }),
76            },
77            _ => Err(SBPFError::BytecodeError {
78                error: format!("Expected immediate value for opcode: {:?}", self.opcode),
79                span: self.span.clone(),
80                custom_label: None,
81            }),
82        }
83    }
84
85    pub fn from_bytes(bytes: &[u8]) -> Result<Self, SBPFError> {
86        let opcode: Opcode = bytes[0].try_into()?;
87        if let Some(handler) = OPCODE_TO_HANDLER.get(&opcode) {
88            (handler.decode)(bytes)
89        } else {
90            Err(SBPFError::BytecodeError {
91                error: format!("no decode handler for opcode {}", opcode),
92                span: 0..1,
93                custom_label: Some("Invalid opcode".to_string()),
94            })
95        }
96    }
97
98    pub fn from_bytes_sbpf_v2(bytes: &[u8]) -> Result<Self, SBPFError> {
99        // Preprocess the opcode byte for SBPF v2 (e_flags == 0x02)
100        let mut processed_bytes = bytes.to_vec();
101
102        match processed_bytes[0] {
103            // New opcodes in v2 that map to existing instructions
104            0x8C => processed_bytes[0] = 0x61, // v2: 0x8C -> ldxw dst, [src + off]
105            0x8F => processed_bytes[0] = 0x63, // v2: 0x8F -> stxw [dst + off], src
106            // Repurposed opcodes in v2
107            0x2C => processed_bytes[0] = 0x71, // v2: mul32 dst, src -> ldxb dst, [src + off]
108            0x3C => processed_bytes[0] = 0x69, // v2: div32 dst, src -> ldxh dst, [src + off]
109            0x9C => processed_bytes[0] = 0x79, // v2: mod32 dst, src -> ldxdw dst, [src + off]
110            0x27 => processed_bytes[0] = 0x72, // v2: mul64 dst, imm -> stb [dst + off], imm
111            0x2F => processed_bytes[0] = 0x73, // v2: mul64 dst, src -> stxb [dst + off], src
112            0x37 => processed_bytes[0] = 0x6A, // v2: div64 dst, imm -> sth [dst + off], imm
113            0x3F => processed_bytes[0] = 0x6B, // v2: div64 dst, src -> stxh [dst + off], src
114            0x87 => processed_bytes[0] = 0x62, // v2: neg64 dst -> stw [dst + off], imm
115            0x97 => processed_bytes[0] = 0x7A, // v2: mod64 dst, imm -> stdw [dst + off], imm
116            0x9F => processed_bytes[0] = 0x7B, // v2: mod64 dst, src -> stxdw [dst + off], src
117            // Revert Lddw
118            0x21 => {
119                if let Some(lddw_2) = processed_bytes.get(8)
120                    && lddw_2 == &0xf7
121                {
122                    processed_bytes[0] = 0x18;
123                    processed_bytes[8..12].clone_from_slice(&[0u8; 4]);
124                }
125            }
126            // Move callx target from src to dst
127            0x8D => processed_bytes[1] >>= 4,
128            // All other opcodes remain unchanged
129            _ => (),
130        }
131
132        Self::from_bytes(&processed_bytes)
133    }
134
135    pub fn to_bytes(&self) -> Result<Vec<u8>, SBPFError> {
136        let dst_val = self.dst.as_ref().map(|r| r.n).unwrap_or(0);
137        let src_val = self.src.as_ref().map(|r| r.n).unwrap_or(0);
138        let off_val = match &self.off {
139            Some(Either::Left(ident)) => {
140                unreachable!("Identifier '{}' should have been resolved earlier", ident)
141            }
142            Some(Either::Right(off)) => *off,
143            None => 0,
144        };
145        let imm_val = match &self.imm {
146            Some(Either::Left(ident)) => {
147                unreachable!("Identifier '{}' should have been resolved earlier", ident)
148            }
149            Some(Either::Right(Number::Int(imm))) | Some(Either::Right(Number::Addr(imm))) => *imm,
150            None => 0,
151        };
152        // fix callx encoding in sbpf
153        let (dst_val, imm_val) = match self.opcode {
154            Opcode::Callx => (0, dst_val as i64), // callx: dst register encoded in imm
155            _ => (dst_val, imm_val),
156        };
157
158        let mut b = vec![self.opcode.into(), src_val << 4 | dst_val];
159        b.extend_from_slice(&off_val.to_le_bytes());
160        b.extend_from_slice(&(imm_val as i32).to_le_bytes());
161        if self.opcode == Opcode::Lddw {
162            b.extend_from_slice(&[0; 4]);
163            b.extend_from_slice(&((imm_val >> 32) as i32).to_le_bytes());
164        }
165        Ok(b)
166    }
167
168    pub fn to_asm(&self, format: AsmFormat) -> Result<String, SBPFError> {
169        match format {
170            AsmFormat::Default => self.to_default_asm(),
171            AsmFormat::Llvm => self.to_llvm_asm(),
172        }
173    }
174
175    fn to_default_asm(&self) -> Result<String, SBPFError> {
176        if let Some(handler) = OPCODE_TO_HANDLER.get(&self.opcode) {
177            match (handler.validate)(self) {
178                Ok(()) => {
179                    let mut asm = if self.opcode == Opcode::Le || self.opcode == Opcode::Be {
180                        self.op_imm_bits()?
181                    } else {
182                        format!("{}", self.opcode)
183                    };
184                    let mut param = vec![];
185
186                    fn fmt_mem_off(r: &Register, off: &Either<String, i16>) -> String {
187                        format!("[r{}{}]", r.n, fmt_off(off))
188                    }
189
190                    if self.get_opcode_type() == OperationType::LoadMemory {
191                        param.push(format!("r{}", self.dst.as_ref().unwrap().n));
192                        param.push(fmt_mem_off(
193                            self.src.as_ref().unwrap(),
194                            self.off.as_ref().unwrap(),
195                        ));
196                    } else if self.get_opcode_type() == OperationType::StoreImmediate {
197                        param.push(fmt_mem_off(
198                            self.dst.as_ref().unwrap(),
199                            self.off.as_ref().unwrap(),
200                        ));
201                        param.push(fmt_imm(self.imm.as_ref().unwrap()));
202                    } else if self.get_opcode_type() == OperationType::StoreRegister {
203                        param.push(fmt_mem_off(
204                            self.dst.as_ref().unwrap(),
205                            self.off.as_ref().unwrap(),
206                        ));
207                        param.push(format!("r{}", self.src.as_ref().unwrap().n));
208                    } else {
209                        if let Some(dst) = &self.dst {
210                            param.push(format!("r{}", dst.n));
211                        }
212                        if let Some(src) = &self.src
213                            && self.opcode != Opcode::Call
214                        {
215                            param.push(format!("r{}", src.n));
216                        }
217                        if let Some(imm) = &self.imm
218                            && self.opcode != Opcode::Le
219                            && self.opcode != Opcode::Be
220                        {
221                            param.push(fmt_imm(imm));
222                        }
223                        if let Some(off) = &self.off {
224                            param.push(fmt_off(off));
225                        }
226                    }
227                    if !param.is_empty() {
228                        asm.push(' ');
229                        asm.push_str(&param.join(", "));
230                    }
231                    Ok(asm)
232                }
233                Err(e) => Err(e),
234            }
235        } else {
236            Err(SBPFError::BytecodeError {
237                error: format!("no validate handler for opcode {}", self.opcode),
238                span: self.span.clone(),
239                custom_label: None,
240            })
241        }
242    }
243
244    fn to_llvm_asm(&self) -> Result<String, SBPFError> {
245        let op_type = self.get_opcode_type();
246
247        fn fmt_mem_off(off: &Either<String, i16>) -> String {
248            match off {
249                Either::Left(label) => label.clone(),
250                Either::Right(v) if *v < 0 => format!("- 0x{:x}", -(*v as i32)),
251                Either::Right(v) => format!("+ 0x{:x}", v),
252            }
253        }
254
255        match op_type {
256            OperationType::BinaryImmediate | OperationType::BinaryRegister => {
257                if self.opcode == Opcode::Le || self.opcode == Opcode::Be {
258                    let bits = self.op_imm_bits()?;
259                    let dst = self.dst.as_ref().unwrap().n;
260                    return Ok(format!("r{} = {} r{}", dst, bits, dst));
261                }
262                let op = self
263                    .opcode
264                    .to_operator()
265                    .ok_or_else(|| SBPFError::BytecodeError {
266                        error: format!("unsupported opcode in LLVM format: {}", self.opcode),
267                        span: self.span.clone(),
268                        custom_label: None,
269                    })?;
270                let prefix = if self.opcode.is_32bit() { "w" } else { "r" };
271                let dst = self.dst.as_ref().unwrap().n;
272                let rhs = if op_type == OperationType::BinaryRegister {
273                    format!("{}{}", prefix, self.src.as_ref().unwrap().n)
274                } else {
275                    fmt_imm(self.imm.as_ref().unwrap())
276                };
277                Ok(format!("{}{} {} {}", prefix, dst, op, rhs))
278            }
279            OperationType::Unary => {
280                let prefix = if self.opcode == Opcode::Neg32 {
281                    "w"
282                } else {
283                    "r"
284                };
285                let dst = self.dst.as_ref().unwrap().n;
286                Ok(format!("{}{} = -{}{}", prefix, dst, prefix, dst))
287            }
288            OperationType::LoadImmediate => {
289                let dst = self.dst.as_ref().unwrap().n;
290                let imm = fmt_imm(self.imm.as_ref().unwrap());
291                Ok(format!("r{} = {} ll", dst, imm))
292            }
293            OperationType::LoadMemory => {
294                let size = self.opcode.to_size().unwrap();
295                let dst_prefix = if self.opcode == Opcode::Ldxdw {
296                    "r"
297                } else {
298                    "w"
299                };
300                let dst = self.dst.as_ref().unwrap().n;
301                let src = self.src.as_ref().unwrap().n;
302                let off = fmt_mem_off(self.off.as_ref().unwrap());
303                Ok(format!(
304                    "{}{} = *({} *)(r{} {})",
305                    dst_prefix, dst, size, src, off
306                ))
307            }
308            OperationType::StoreImmediate => {
309                let size = self.opcode.to_size().unwrap();
310                let dst = self.dst.as_ref().unwrap().n;
311                let off = fmt_mem_off(self.off.as_ref().unwrap());
312                let imm = fmt_imm(self.imm.as_ref().unwrap());
313                Ok(format!("*({} *)(r{} {}) = {}", size, dst, off, imm))
314            }
315            OperationType::StoreRegister => {
316                let size = self.opcode.to_size().unwrap();
317                let dst = self.dst.as_ref().unwrap().n;
318                let off = fmt_mem_off(self.off.as_ref().unwrap());
319                let src_prefix = if self.opcode == Opcode::Stxdw {
320                    "r"
321                } else {
322                    "w"
323                };
324                let src = self.src.as_ref().unwrap().n;
325                Ok(format!(
326                    "*({} *)(r{} {}) = {}{}",
327                    size, dst, off, src_prefix, src
328                ))
329            }
330            OperationType::Jump => {
331                let off = fmt_off(self.off.as_ref().unwrap());
332                Ok(format!("goto {}", off))
333            }
334            OperationType::JumpImmediate => {
335                let dst = self.dst.as_ref().unwrap().n;
336                let op = self.opcode.to_operator().unwrap();
337                let imm = fmt_imm(self.imm.as_ref().unwrap());
338                let off = fmt_off(self.off.as_ref().unwrap());
339                Ok(format!("if r{} {} {} goto {}", dst, op, imm, off))
340            }
341            OperationType::JumpRegister => {
342                let dst = self.dst.as_ref().unwrap().n;
343                let op = self.opcode.to_operator().unwrap();
344                let src = self.src.as_ref().unwrap().n;
345                let off = fmt_off(self.off.as_ref().unwrap());
346                Ok(format!("if r{} {} r{} goto {}", dst, op, src, off))
347            }
348            OperationType::CallImmediate | OperationType::CallRegister | OperationType::Exit => {
349                self.to_default_asm()
350            }
351        }
352    }
353}
354
355fn fmt_off(off: &Either<String, i16>) -> String {
356    match off {
357        Either::Left(label) => label.clone(),
358        Either::Right(v) if *v < 0 => format!("-0x{:x}", -(*v as i32)),
359        Either::Right(v) => format!("+0x{:x}", v),
360    }
361}
362
363fn fmt_imm(imm: &Either<String, Number>) -> String {
364    match imm {
365        Either::Left(label) => label.clone(),
366        Either::Right(Number::Int(v)) | Either::Right(Number::Addr(v)) => {
367            if *v < 0 {
368                format!("-0x{:x}", -v)
369            } else {
370                format!("0x{:x}", v)
371            }
372        }
373    }
374}
375
376#[cfg(test)]
377mod test {
378    use {
379        crate::{
380            inst_param::{Number, Register},
381            instruction::{AsmFormat, Instruction},
382            opcode::Opcode,
383        },
384        either::Either,
385        hex_literal::hex,
386        syscall_map::murmur3_32,
387    };
388
389    #[test]
390    fn serialize_e2e() {
391        let b = hex!("9700000000000000");
392        let i = Instruction::from_bytes(&b).unwrap();
393        assert_eq!(i.to_bytes().unwrap(), &b);
394        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "mod64 r0, 0x0");
395        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r0 %= 0x0");
396    }
397
398    #[test]
399    fn serialize_e2e_lddw() {
400        let b = hex!("18010000000000000000000000000000");
401        let i = Instruction::from_bytes(&b).unwrap();
402        assert_eq!(i.to_bytes().unwrap(), &b);
403        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "lddw r1, 0x0");
404        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r1 = 0x0 ll");
405    }
406
407    #[test]
408    fn serialize_e2e_add64_imm() {
409        let b = hex!("0701000000000000");
410        let i = Instruction::from_bytes(&b).unwrap();
411        assert_eq!(i.to_bytes().unwrap(), &b);
412        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "add64 r1, 0x0");
413        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r1 += 0x0");
414    }
415
416    #[test]
417    fn serialize_e2e_add64_reg() {
418        let b = hex!("0f12000000000000");
419        let i = Instruction::from_bytes(&b).unwrap();
420        assert_eq!(i.to_bytes().unwrap(), &b);
421        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "add64 r2, r1");
422        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r2 += r1");
423    }
424
425    #[test]
426    fn serialize_e2e_ja() {
427        let b = hex!("05000a0000000000");
428        let i = Instruction::from_bytes(&b).unwrap();
429        assert_eq!(i.to_bytes().unwrap(), &b);
430        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "ja +0xa");
431        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "goto +0xa");
432    }
433
434    #[test]
435    fn serialize_e2e_jeq_imm() {
436        let b = hex!("15030a0001000000");
437        let i = Instruction::from_bytes(&b).unwrap();
438        assert_eq!(i.to_bytes().unwrap(), &b);
439        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "jeq r3, 0x1, +0xa");
440        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "if r3 == 0x1 goto +0xa");
441    }
442
443    #[test]
444    fn serialize_e2e_jeq_reg() {
445        let b = hex!("1d210a0000000000");
446        let i = Instruction::from_bytes(&b).unwrap();
447        assert_eq!(i.to_bytes().unwrap(), &b);
448        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "jeq r1, r2, +0xa");
449        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "if r1 == r2 goto +0xa");
450    }
451
452    #[test]
453    fn serialize_e2e_ldxw() {
454        let b = hex!("6112000000000000");
455        let i = Instruction::from_bytes(&b).unwrap();
456        assert_eq!(i.to_bytes().unwrap(), &b);
457        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "ldxw r2, [r1+0x0]");
458        assert_eq!(
459            i.to_asm(AsmFormat::Llvm).unwrap(),
460            "w2 = *(u32 *)(r1 + 0x0)"
461        );
462    }
463
464    #[test]
465    fn serialize_e2e_stxw() {
466        let b = hex!("6312000000000000");
467        let i = Instruction::from_bytes(&b).unwrap();
468        assert_eq!(i.to_bytes().unwrap(), &b);
469        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "stxw [r2+0x0], r1");
470        assert_eq!(
471            i.to_asm(AsmFormat::Llvm).unwrap(),
472            "*(u32 *)(r2 + 0x0) = w1"
473        );
474    }
475
476    #[test]
477    fn serialize_e2e_stb() {
478        let b = hex!("7200000000000000");
479        let i = Instruction::from_bytes(&b).unwrap();
480        assert_eq!(i.opcode, Opcode::Stb);
481        assert!(i.src.is_none());
482        assert_eq!(i.to_bytes().unwrap(), &b);
483        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "stb [r0+0x0], 0x0");
484        assert_eq!(
485            i.to_asm(AsmFormat::Llvm).unwrap(),
486            "*(u8 *)(r0 + 0x0) = 0x0"
487        );
488    }
489
490    #[test]
491    fn serialize_e2e_sth() {
492        let b = hex!("6a01040034120000");
493        let i = Instruction::from_bytes(&b).unwrap();
494        assert_eq!(i.opcode, Opcode::Sth);
495        assert!(i.src.is_none());
496        assert_eq!(i.to_bytes().unwrap(), &b);
497        assert_eq!(
498            i.to_asm(AsmFormat::Default).unwrap(),
499            "sth [r1+0x4], 0x1234"
500        );
501        assert_eq!(
502            i.to_asm(AsmFormat::Llvm).unwrap(),
503            "*(u16 *)(r1 + 0x4) = 0x1234"
504        );
505    }
506
507    #[test]
508    fn serialize_e2e_stw() {
509        let b = hex!("6201080064000000");
510        let i = Instruction::from_bytes(&b).unwrap();
511        assert_eq!(i.opcode, Opcode::Stw);
512        assert!(i.src.is_none());
513        assert_eq!(i.to_bytes().unwrap(), &b);
514        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "stw [r1+0x8], 0x64");
515        assert_eq!(
516            i.to_asm(AsmFormat::Llvm).unwrap(),
517            "*(u32 *)(r1 + 0x8) = 0x64"
518        );
519    }
520
521    #[test]
522    fn serialize_e2e_stdw() {
523        let b = hex!("7a021000efbeadde");
524        let i = Instruction::from_bytes(&b).unwrap();
525        assert_eq!(i.opcode, Opcode::Stdw);
526        assert!(i.src.is_none());
527        assert_eq!(i.to_bytes().unwrap(), &b);
528        assert_eq!(
529            i.to_asm(AsmFormat::Default).unwrap(),
530            "stdw [r2+0x10], -0x21524111"
531        );
532        assert_eq!(
533            i.to_asm(AsmFormat::Llvm).unwrap(),
534            "*(u64 *)(r2 + 0x10) = -0x21524111"
535        );
536    }
537
538    #[test]
539    fn serialize_e2e_le16() {
540        let b = hex!("d401000010000000");
541        let i = Instruction::from_bytes(&b).unwrap();
542        assert_eq!(i.opcode, Opcode::Le);
543        assert_eq!(i.to_bytes().unwrap(), &b);
544        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "le16 r1");
545        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r1 = le16 r1");
546    }
547
548    #[test]
549    fn serialize_e2e_le32() {
550        let b = hex!("d401000020000000");
551        let i = Instruction::from_bytes(&b).unwrap();
552        assert_eq!(i.opcode, Opcode::Le);
553        assert_eq!(i.to_bytes().unwrap(), &b);
554        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "le32 r1");
555        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r1 = le32 r1");
556    }
557
558    #[test]
559    fn serialize_e2e_le64() {
560        let b = hex!("d403000040000000");
561        let i = Instruction::from_bytes(&b).unwrap();
562        assert_eq!(i.opcode, Opcode::Le);
563        assert_eq!(i.to_bytes().unwrap(), &b);
564        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "le64 r3");
565        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r3 = le64 r3");
566    }
567
568    #[test]
569    fn serialize_e2e_be16() {
570        let b = hex!("dc01000010000000");
571        let i = Instruction::from_bytes(&b).unwrap();
572        assert_eq!(i.opcode, Opcode::Be);
573        assert_eq!(i.to_bytes().unwrap(), &b);
574        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "be16 r1");
575        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r1 = be16 r1");
576    }
577
578    #[test]
579    fn serialize_e2e_be32() {
580        let b = hex!("dc02000020000000");
581        let i = Instruction::from_bytes(&b).unwrap();
582        assert_eq!(i.opcode, Opcode::Be);
583        assert_eq!(i.to_bytes().unwrap(), &b);
584        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "be32 r2");
585        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r2 = be32 r2");
586    }
587
588    #[test]
589    fn serialize_e2e_be64() {
590        let b = hex!("dc03000040000000");
591        let i = Instruction::from_bytes(&b).unwrap();
592        assert_eq!(i.opcode, Opcode::Be);
593        assert_eq!(i.to_bytes().unwrap(), &b);
594        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "be64 r3");
595        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r3 = be64 r3");
596    }
597
598    #[test]
599    fn serialize_e2e_neg64() {
600        let b = hex!("8700000000000000");
601        let i = Instruction::from_bytes(&b).unwrap();
602        assert_eq!(i.to_bytes().unwrap(), &b);
603        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "neg64 r0");
604        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "r0 = -r0");
605    }
606
607    #[test]
608    fn serialize_e2e_exit() {
609        let b = hex!("9500000000000000");
610        let i = Instruction::from_bytes(&b).unwrap();
611        assert_eq!(i.to_bytes().unwrap(), &b);
612        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "exit");
613        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "exit");
614    }
615
616    #[test]
617    fn serialize_e2e_jset_imm() {
618        let b = hex!("45030a0010000000");
619        let i = Instruction::from_bytes(&b).unwrap();
620        assert_eq!(i.to_bytes().unwrap(), &b);
621        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "jset r3, 0x10, +0xa");
622        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "if r3 & 0x10 goto +0xa");
623    }
624
625    #[test]
626    fn serialize_e2e_sub32_imm() {
627        let b = hex!("1401000042000000");
628        let i = Instruction::from_bytes(&b).unwrap();
629        assert_eq!(i.to_bytes().unwrap(), &b);
630        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "sub32 r1, 0x42");
631        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "w1 -= 0x42");
632    }
633
634    #[test]
635    fn serialize_e2e_mov32_imm() {
636        let b = hex!("b400000001000000");
637        let i = Instruction::from_bytes(&b).unwrap();
638        assert_eq!(i.to_bytes().unwrap(), &b);
639        assert_eq!(i.to_asm(AsmFormat::Default).unwrap(), "mov32 r0, 0x1");
640        assert_eq!(i.to_asm(AsmFormat::Llvm).unwrap(), "w0 = 0x1");
641    }
642
643    #[test]
644    fn test_instruction_size() {
645        let exit = Instruction::from_bytes(&hex!("9500000000000000")).unwrap();
646        assert_eq!(exit.get_size(), 8);
647
648        let lddw = Instruction::from_bytes(&hex!("18010000000000000000000000000000")).unwrap();
649        assert_eq!(lddw.get_size(), 16);
650    }
651
652    #[test]
653    fn test_is_jump() {
654        let ja = Instruction::from_bytes(&hex!("0500000000000000")).unwrap();
655        assert!(ja.is_jump());
656
657        let jeq_imm = Instruction::from_bytes(&hex!("1502000000000000")).unwrap();
658        assert!(jeq_imm.is_jump());
659
660        let jeq_reg = Instruction::from_bytes(&hex!("1d12000000000000")).unwrap();
661        assert!(jeq_reg.is_jump());
662
663        let exit = Instruction::from_bytes(&hex!("9500000000000000")).unwrap();
664        assert!(!exit.is_jump());
665
666        let add64 = Instruction::from_bytes(&hex!("0701000000000000")).unwrap();
667        assert!(!add64.is_jump());
668    }
669
670    #[test]
671    fn test_invalid_opcode() {
672        let result = Instruction::from_bytes(&hex!("ff00000000000000"));
673        assert!(result.is_err());
674    }
675
676    #[test]
677    fn test_unsupported_opcode() {
678        let add32 = Instruction::from_bytes(&hex!("1300000000000000"));
679        assert!(add32.is_err());
680    }
681
682    #[test]
683    fn test_op_imm_bits_16() {
684        let inst = Instruction {
685            opcode: Opcode::Le,
686            dst: Some(Register { n: 1 }),
687            src: None,
688            off: None,
689            imm: Some(Either::Right(Number::Int(16))),
690            span: 0..8,
691        };
692        assert_eq!(inst.op_imm_bits().unwrap(), "le16");
693    }
694
695    #[test]
696    fn test_op_imm_bits_32() {
697        let inst = Instruction {
698            opcode: Opcode::Le,
699            dst: Some(Register { n: 1 }),
700            src: None,
701            off: None,
702            imm: Some(Either::Right(Number::Int(32))),
703            span: 0..8,
704        };
705        assert_eq!(inst.op_imm_bits().unwrap(), "le32");
706    }
707
708    #[test]
709    fn test_op_imm_bits_64() {
710        let inst = Instruction {
711            opcode: Opcode::Be,
712            dst: Some(Register { n: 1 }),
713            src: None,
714            off: None,
715            imm: Some(Either::Right(Number::Int(64))),
716            span: 0..8,
717        };
718        assert_eq!(inst.op_imm_bits().unwrap(), "be64");
719    }
720
721    #[test]
722    fn test_op_imm_bits_invalid() {
723        let inst = Instruction {
724            opcode: Opcode::Le,
725            dst: Some(Register { n: 1 }),
726            src: None,
727            off: None,
728            imm: Some(Either::Right(Number::Int(8))),
729            span: 0..8,
730        };
731        assert!(inst.op_imm_bits().is_err());
732    }
733
734    #[test]
735    fn test_op_imm_bits_no_imm() {
736        let inst = Instruction {
737            opcode: Opcode::Le,
738            dst: Some(Register { n: 1 }),
739            src: None,
740            off: None,
741            imm: None,
742            span: 0..8,
743        };
744        assert!(inst.op_imm_bits().is_err());
745    }
746
747    #[test]
748    fn test_to_bytes_callx() {
749        // callx r5 - dst register encoded in imm
750        let inst = Instruction {
751            opcode: Opcode::Callx,
752            dst: Some(Register { n: 5 }),
753            src: None,
754            off: None,
755            imm: None,
756            span: 0..8,
757        };
758        let bytes = inst.to_bytes().unwrap();
759        assert_eq!(bytes[0], 0x8d);
760        assert_eq!(bytes[4], 5);
761    }
762
763    #[test]
764    #[should_panic(expected = "should have been resolved earlier")]
765    fn test_to_bytes_call_with_identifier() {
766        let inst = Instruction {
767            opcode: Opcode::Call,
768            dst: None,
769            src: None,
770            off: None,
771            imm: Some(Either::Left("function".to_string())),
772            span: 0..8,
773        };
774        // This should panic because "function" does not exist
775        let _ = inst.to_bytes().unwrap();
776    }
777
778    #[test]
779    fn test_to_asm_with_imm_addr() {
780        // Test Number::Addr variant in to_bytes
781        let inst = Instruction {
782            opcode: Opcode::Add64Imm,
783            dst: Some(Register { n: 1 }),
784            src: None,
785            off: None,
786            imm: Some(Either::Right(Number::Addr(100))),
787            span: 0..8,
788        };
789        let bytes = inst.to_bytes().unwrap();
790        assert_eq!(bytes[0], 0x07); // add64 imm opcode
791        assert_eq!(
792            i32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
793            100
794        );
795    }
796
797    #[test]
798    fn test_from_bytes_sbpf_v2() {
799        // Test all v2 opcode mappings and repurposed opcodes
800        let test_cases = vec![
801            // New opcodes in v2
802            (hex!("8c12000000000000"), Opcode::Ldxw, "v2: 0x8C -> ldxw"),
803            (hex!("8f12000000000000"), Opcode::Stxw, "v2: 0x8F -> stxw"),
804            // Repurposed opcodes in v2
805            (
806                hex!("2c12000000000000"),
807                Opcode::Ldxb,
808                "v2: 0x2C (mul32 reg) -> ldxb",
809            ),
810            (
811                hex!("3c12000000000000"),
812                Opcode::Ldxh,
813                "v2: 0x3C (div32 reg) -> ldxh",
814            ),
815            (
816                hex!("9c12000000000000"),
817                Opcode::Ldxdw,
818                "v2: 0x9C (mod32 reg) -> ldxdw",
819            ),
820            (
821                hex!("2701040064000000"),
822                Opcode::Stb,
823                "v2: 0x27 (mul64 imm) -> stb",
824            ),
825            (
826                hex!("2f12040000000000"),
827                Opcode::Stxb,
828                "v2: 0x2F (mul64 reg) -> stxb",
829            ),
830            (
831                hex!("3701040064000000"),
832                Opcode::Sth,
833                "v2: 0x37 (div64 imm) -> sth",
834            ),
835            (
836                hex!("3f12040000000000"),
837                Opcode::Stxh,
838                "v2: 0x3F (div64 reg) -> stxh",
839            ),
840            (
841                hex!("8701040064000000"),
842                Opcode::Stw,
843                "v2: 0x87 (neg64) -> stw",
844            ),
845            (
846                hex!("9701040064000000"),
847                Opcode::Stdw,
848                "v2: 0x97 (mod64 imm) -> stdw",
849            ),
850            (
851                hex!("9f12040000000000"),
852                Opcode::Stxdw,
853                "v2: 0x9F (mod64 reg) -> stxdw",
854            ),
855        ];
856
857        for (bytes, expected_opcode, description) in test_cases {
858            let inst = Instruction::from_bytes_sbpf_v2(&bytes).unwrap();
859            assert_eq!(inst.opcode, expected_opcode, "{}", description);
860        }
861
862        // Test callx
863        let callx_bytes = hex!("8d50000000000000");
864        let callx_inst = Instruction::from_bytes_sbpf_v2(&callx_bytes).unwrap();
865        assert_eq!(callx_inst.opcode, Opcode::Callx);
866        assert_eq!(callx_inst.dst.unwrap().n, 5);
867
868        // Test lddw
869        let mut lddw_bytes = vec![0x21, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
870        lddw_bytes.extend_from_slice(&[0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
871        let lddw_inst = Instruction::from_bytes_sbpf_v2(&lddw_bytes).unwrap();
872        assert_eq!(lddw_inst.opcode, Opcode::Lddw);
873    }
874
875    #[test]
876    fn test_is_syscall() {
877        let test_cases = vec![
878            // Syscalls
879            ("sol_log_", true),
880            ("sol_invoke_signed_c", true),
881            ("abort", true),
882            ("sol_sha256", true),
883            ("sol_memcpy_", true),
884            // Non-syscalls
885            ("my_fn", false),
886            ("helper_function", false),
887            ("entrypoint", false),
888            ("random", false),
889        ];
890
891        for (name, expected) in test_cases {
892            let inst = Instruction {
893                opcode: Opcode::Call,
894                dst: None,
895                src: Some(Register { n: 1 }),
896                off: None,
897                imm: Some(Either::Left(name.to_string())),
898                span: 0..8,
899            };
900            assert_eq!(inst.is_syscall(), expected);
901        }
902    }
903
904    #[test]
905    fn test_to_bytes_syscall_dynamic() {
906        let inst = Instruction {
907            opcode: Opcode::Call,
908            dst: None,
909            src: Some(Register { n: 1 }),
910            off: None,
911            imm: Some(Either::Right(Number::Int(-1))),
912            span: 0..8,
913        };
914        let bytes = inst.to_bytes().unwrap();
915        assert_eq!(bytes[0], 0x85);
916        assert_eq!(bytes[1], 0x10);
917
918        // imm should be -1 (FF FF FF FF)
919        assert_eq!(&bytes[4..8], &[0xFF, 0xFF, 0xFF, 0xFF]);
920    }
921
922    #[test]
923    fn test_to_bytes_syscall_static() {
924        let syscall_hash = murmur3_32("sol_log_");
925        let inst = Instruction {
926            opcode: Opcode::Call,
927            dst: None,
928            src: Some(Register { n: 0 }),
929            off: None,
930            imm: Some(Either::Right(Number::Int(syscall_hash as i64))),
931            span: 0..8,
932        };
933        let bytes = inst.to_bytes().unwrap();
934        assert_eq!(bytes[0], 0x85);
935        assert_eq!(bytes[1], 0x00);
936
937        // imm should be the murmur3_32 hash
938        let actual_imm = u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]);
939        assert_eq!(actual_imm, syscall_hash);
940    }
941}