Skip to main content

sbpf_disassembler/
program.rs

1use {
2    crate::{
3        elf_header::{E_MACHINE, E_MACHINE_SBPF, ELFHeader},
4        errors::DisassemblerError,
5        program_header::ProgramHeader,
6        relocation::Relocation,
7        rodata::RodataSection,
8        section_header::SectionHeader,
9        section_header_entry::SectionHeaderEntry,
10    },
11    either::Either,
12    object::{Endianness, read::elf::ElfFile64},
13    sbpf_common::{inst_param::Number, instruction::Instruction, opcode::Opcode},
14    serde::{Deserialize, Serialize},
15    std::collections::{BTreeSet, HashMap},
16};
17
18#[derive(Debug, Serialize, Deserialize)]
19pub struct Program {
20    pub elf_header: ELFHeader,
21    pub program_headers: Vec<ProgramHeader>,
22    pub section_headers: Vec<SectionHeader>,
23    pub section_header_entries: Vec<SectionHeaderEntry>,
24    pub relocations: Vec<Relocation>,
25}
26
27impl Program {
28    pub fn from_bytes(b: &[u8]) -> Result<Self, DisassemblerError> {
29        let elf_file = ElfFile64::<Endianness>::parse(b).map_err(|e| {
30            eprintln!("ELF parse error: {}", e);
31            DisassemblerError::NonStandardElfHeader
32        })?;
33
34        // Parse elf header.
35        let elf_header = ELFHeader::from_elf_file(&elf_file)?;
36
37        // Parse program headers.
38        let program_headers = ProgramHeader::from_elf_file(&elf_file)?;
39
40        // Parse section headers and section header entries.
41        let (section_headers, section_header_entries) = SectionHeader::from_elf_file(&elf_file)?;
42
43        // Parse relocations.
44        let relocations = Relocation::from_elf_file(&elf_file)?;
45
46        Ok(Self {
47            elf_header,
48            program_headers,
49            section_headers,
50            section_header_entries,
51            relocations,
52        })
53    }
54
55    pub fn to_ixs(self) -> Result<(Vec<Instruction>, Option<RodataSection>), DisassemblerError> {
56        // Find and populate instructions for the .text section
57        let text_section = self
58            .section_header_entries
59            .iter()
60            .find(|e| e.label.eq(".text\0"))
61            .ok_or(DisassemblerError::MissingTextSection)?;
62        let text_section_offset = text_section.offset as u64;
63
64        // Build syscall map
65        let syscall_map = self.build_syscall_map(text_section_offset);
66
67        let data = &text_section.data;
68        if !data.len().is_multiple_of(8) {
69            return Err(DisassemblerError::InvalidDataLength);
70        }
71
72        let is_sbpf_v2 =
73            self.elf_header.e_flags == 0x02 && self.elf_header.e_machine == E_MACHINE_SBPF;
74
75        // Get rodata info
76        let rodata_info = self.get_rodata_info();
77        let (rodata_base, rodata_end) = rodata_info
78            .as_ref()
79            .map(|(d, addr)| (*addr, *addr + d.len() as u64))
80            .unwrap_or((0, 0));
81
82        // Parse instructions and build slot mappings
83        let mut ixs: Vec<Instruction> = Vec::new();
84        let mut idx_to_slot: Vec<usize> = Vec::new();
85        let mut pos: usize = 0;
86        let mut slot: usize = 0;
87
88        while pos < data.len() {
89            let remaining = &data[pos..];
90            if remaining.len() < 8 {
91                break;
92            }
93
94            // ugly v2 shit we need to fix goes here:
95            let mut ix = if is_sbpf_v2 {
96                Instruction::from_bytes_sbpf_v2(remaining)?
97            } else {
98                Instruction::from_bytes(remaining)?
99            };
100
101            // Handle syscall relocation
102            if ix.opcode == Opcode::Call
103                && let Some(Either::Right(Number::Int(-1))) = ix.imm
104                && let Some(syscall_name) = syscall_map.get(&(pos as u64))
105            {
106                ix.imm = Some(Either::Left(syscall_name.clone()));
107            }
108
109            idx_to_slot.push(slot);
110
111            if ix.opcode == Opcode::Lddw {
112                pos += 16;
113                slot += 2;
114            } else {
115                pos += 8;
116                slot += 1;
117            }
118
119            ixs.push(ix);
120        }
121
122        let mut slot_to_idx = vec![0usize; slot];
123        for (idx, &slot) in idx_to_slot.iter().enumerate() {
124            slot_to_idx[slot] = idx;
125        }
126
127        // Resolve jump/call labels and collect rodata references
128        let mut rodata_refs = BTreeSet::new();
129
130        for (idx, ix) in ixs.iter_mut().enumerate() {
131            let is_lddw = ix.opcode == Opcode::Lddw;
132
133            // Resolve jump targets
134            if ix.is_jump()
135                && let Some(Either::Right(off)) = &ix.off
136            {
137                let current_slot = idx_to_slot[idx] as i64;
138                let target_slot = current_slot + 1 + (*off as i64);
139                if target_slot >= 0
140                    && let Some(&target_idx) = slot_to_idx.get(target_slot as usize)
141                {
142                    let new_off = target_idx as i64 - (idx as i64 + 1);
143                    ix.off = Some(Either::Right(new_off as i16));
144                }
145            }
146
147            // Resolve internal call targets
148            if ix.opcode == Opcode::Call
149                && let Some(Either::Right(Number::Int(imm))) = &ix.imm
150            {
151                let current_slot = idx_to_slot[idx] as i64;
152                let target_slot = current_slot + 1 + *imm;
153                if target_slot >= 0
154                    && let Some(&target_idx) = slot_to_idx.get(target_slot as usize)
155                {
156                    let new_rel = target_idx as i64 - (idx as i64 + 1);
157                    ix.imm = Some(Either::Right(Number::Int(new_rel)));
158                }
159            }
160
161            // Collect rodata references
162            if is_lddw
163                && rodata_info.is_some()
164                && let Some(Either::Right(Number::Int(imm))) = &ix.imm
165            {
166                let addr = *imm as u64;
167                if addr >= rodata_base && addr < rodata_end {
168                    rodata_refs.insert(addr);
169                }
170            }
171        }
172
173        // Parse rodata section
174        let rodata = if let Some((data, base_addr)) = rodata_info {
175            Some(RodataSection::parse(data, base_addr, &rodata_refs))
176        } else {
177            None
178        };
179
180        Ok((ixs, rodata))
181    }
182
183    /// Build a hashmap where:
184    /// - key: relative position within .text section
185    /// - value: syscall name (sol_log_64_, sol_log_, etc.)
186    fn build_syscall_map(&self, text_section_offset: u64) -> HashMap<u64, String> {
187        self.relocations
188            .iter()
189            .filter(|r| r.is_syscall())
190            .filter_map(|r| {
191                r.symbol_name.as_ref().map(|name| {
192                    // Convert absolute offset to relative position within .text
193                    let relative_pos = r.relative_offset(text_section_offset);
194                    (relative_pos, name.clone())
195                })
196            })
197            .collect()
198    }
199
200    /// Get the raw rodata bytes and the virtual address where it's loaded in memory
201    fn get_rodata_info(&self) -> Option<(Vec<u8>, u64)> {
202        let rodata_entry = self
203            .section_header_entries
204            .iter()
205            .find(|e| e.label.starts_with(".rodata"))?;
206
207        // v3: use program header p_vaddr
208        // v0: use section header sh_addr
209        let vaddr = if self.is_v3() {
210            self.program_headers
211                .iter()
212                .find(|ph| {
213                    let rodata_offset = rodata_entry.offset as u64;
214                    rodata_offset >= ph.p_offset && rodata_offset < ph.p_offset + ph.p_filesz
215                })
216                .map(|ph| ph.p_vaddr)
217                .unwrap_or(0)
218        } else {
219            let rodata_header = self
220                .section_headers
221                .iter()
222                .find(|h| h.sh_offset as usize == rodata_entry.offset)?;
223            rodata_header.sh_addr
224        };
225
226        Some((rodata_entry.data.clone(), vaddr))
227    }
228
229    /// Get the entrypoint offset
230    pub fn get_entrypoint_offset(&self) -> Option<u64> {
231        let e_entry = self.elf_header.e_entry;
232
233        if self.is_v3() {
234            const V3_BYTECODE_VADDR: u64 = 1 << 32;
235            if e_entry >= V3_BYTECODE_VADDR {
236                Some(e_entry - V3_BYTECODE_VADDR)
237            } else {
238                None
239            }
240        } else {
241            let text_header = self.section_headers.iter().find(|h| {
242                self.section_header_entries
243                    .iter()
244                    .any(|e| e.label.eq(".text\0") && e.offset == h.sh_offset as usize)
245            })?;
246            let text_sh_addr = text_header.sh_addr;
247
248            if e_entry >= text_sh_addr {
249                Some(e_entry - text_sh_addr)
250            } else {
251                None
252            }
253        }
254    }
255
256    fn is_v3(&self) -> bool {
257        self.elf_header.e_flags == 0x03 && self.elf_header.e_machine == E_MACHINE
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use {
264        crate::{
265            elf_header::{E_MACHINE_SBPF, ELFHeader},
266            program::Program,
267            section_header_entry::SectionHeaderEntry,
268        },
269        hex_literal::hex,
270    };
271
272    #[test]
273    fn try_deserialize_program() {
274        let program = Program::from_bytes(&hex!("7F454C460201010000000000000000000300F700010000002001000000000000400000000000000028020000000000000000000040003800030040000600050001000000050000002001000000000000200100000000000020010000000000003000000000000000300000000000000000100000000000000100000004000000C001000000000000C001000000000000C0010000000000003C000000000000003C000000000000000010000000000000020000000600000050010000000000005001000000000000500100000000000070000000000000007000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007912A000000000007911182900000000B7000000010000002D21010000000000B70000000000000095000000000000001E0000000000000004000000000000000600000000000000C0010000000000000B0000000000000018000000000000000500000000000000F0010000000000000A000000000000000C00000000000000160000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120001002001000000000000300000000000000000656E747279706F696E7400002E74657874002E64796E737472002E64796E73796D002E64796E616D6963002E73687374727461620000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010000000600000000000000200100000000000020010000000000003000000000000000000000000000000008000000000000000000000000000000170000000600000003000000000000005001000000000000500100000000000070000000000000000400000000000000080000000000000010000000000000000F0000000B0000000200000000000000C001000000000000C001000000000000300000000000000004000000010000000800000000000000180000000000000007000000030000000200000000000000F001000000000000F0010000000000000C00000000000000000000000000000001000000000000000000000000000000200000000300000000000000000000000000000000000000FC010000000000002A00000000000000000000000000000001000000000000000000000000000000")).unwrap();
275        println!("{:?}", program.section_header_entries);
276    }
277
278    #[test]
279    fn test_to_ixs_invalid_data_length() {
280        // Create program with .text section that has invalid length (not multiple of 8)
281        let program = Program {
282            elf_header: ELFHeader {
283                ei_magic: [127, 69, 76, 70],
284                ei_class: 2,
285                ei_data: 1,
286                ei_version: 1,
287                ei_osabi: 0,
288                ei_abiversion: 0,
289                ei_pad: [0; 7],
290                e_type: 0,
291                e_machine: 0,
292                e_version: 0,
293                e_entry: 0,
294                e_phoff: 0,
295                e_shoff: 0,
296                e_flags: 0,
297                e_ehsize: 0,
298                e_phentsize: 0,
299                e_phnum: 0,
300                e_shentsize: 0,
301                e_shnum: 0,
302                e_shstrndx: 0,
303            },
304            program_headers: vec![],
305            section_headers: vec![],
306            section_header_entries: vec![
307                SectionHeaderEntry::new(".text\0".to_string(), 0, vec![0x95, 0x00, 0x00]).unwrap(), // Only 3 bytes
308            ],
309            relocations: vec![],
310        };
311
312        let result = program.to_ixs();
313        assert!(result.is_err());
314        assert!(matches!(
315            result.unwrap_err(),
316            crate::errors::DisassemblerError::InvalidDataLength
317        ));
318    }
319
320    #[test]
321    fn test_to_ixs_with_lddw() {
322        // Test with 16 bytes lddw instruction
323
324        let mut lddw_bytes = vec![0x18, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
325        lddw_bytes.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
326        lddw_bytes.extend_from_slice(&[0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); // exit
327
328        let program = Program {
329            elf_header: ELFHeader {
330                ei_magic: [127, 69, 76, 70],
331                ei_class: 2,
332                ei_data: 1,
333                ei_version: 1,
334                ei_osabi: 0,
335                ei_abiversion: 0,
336                ei_pad: [0; 7],
337                e_type: 0,
338                e_machine: E_MACHINE_SBPF,
339                e_version: 0,
340                e_entry: 0,
341                e_phoff: 0,
342                e_shoff: 0,
343                e_flags: 0,
344                e_ehsize: 0,
345                e_phentsize: 0,
346                e_phnum: 0,
347                e_shentsize: 0,
348                e_shnum: 0,
349                e_shstrndx: 0,
350            },
351            program_headers: vec![],
352            section_headers: vec![],
353            section_header_entries: vec![
354                SectionHeaderEntry::new(".text\0".to_string(), 0, lddw_bytes).unwrap(),
355            ],
356            relocations: vec![],
357        };
358
359        let (ixs, _) = program.to_ixs().unwrap();
360        assert_eq!(ixs.len(), 2); // lddw + exit
361        assert_eq!(ixs[0].opcode, sbpf_common::opcode::Opcode::Lddw);
362    }
363
364    #[test]
365    fn test_to_ixs_sbpf_v2() {
366        // Use a v2 opcode (0x8C -> ldxw in v2)
367        let v2_bytes = vec![0x8c, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
368
369        let program = Program {
370            elf_header: ELFHeader {
371                ei_magic: [127, 69, 76, 70],
372                ei_class: 2,
373                ei_data: 1,
374                ei_version: 1,
375                ei_osabi: 0,
376                ei_abiversion: 0,
377                ei_pad: [0; 7],
378                e_type: 0,
379                e_machine: E_MACHINE_SBPF,
380                e_version: 0,
381                e_entry: 0,
382                e_phoff: 0,
383                e_shoff: 0,
384                e_flags: 0x02, // SBPF v2 flag
385                e_ehsize: 0,
386                e_phentsize: 0,
387                e_phnum: 0,
388                e_shentsize: 0,
389                e_shnum: 0,
390                e_shstrndx: 0,
391            },
392            program_headers: vec![],
393            section_headers: vec![],
394            section_header_entries: vec![
395                SectionHeaderEntry::new(".text\0".to_string(), 0, v2_bytes).unwrap(),
396            ],
397            relocations: vec![],
398        };
399
400        let (ixs, _) = program.to_ixs().unwrap();
401        assert_eq!(ixs.len(), 1);
402        assert_eq!(ixs[0].opcode, sbpf_common::opcode::Opcode::Ldxw);
403    }
404}