sbpf_disassembler/
program.rs

1use {
2    crate::{
3        elf_header::{E_MACHINE_SBPF, ELFHeader},
4        errors::DisassemblerError,
5        program_header::ProgramHeader,
6        relocation::Relocation,
7        rodata::RodataSection,
8        section_header::SectionHeader,
9        section_header_entry::SectionHeaderEntry,
10    },
11    either::Either,
12    object::{Endianness, read::elf::ElfFile64},
13    sbpf_common::{inst_param::Number, instruction::Instruction, opcode::Opcode},
14    serde::{Deserialize, Serialize},
15    std::collections::{BTreeSet, HashMap},
16};
17
18#[derive(Debug, Serialize, Deserialize)]
19pub struct Program {
20    pub elf_header: ELFHeader,
21    pub program_headers: Vec<ProgramHeader>,
22    pub section_headers: Vec<SectionHeader>,
23    pub section_header_entries: Vec<SectionHeaderEntry>,
24    pub relocations: Vec<Relocation>,
25}
26
27impl Program {
28    pub fn from_bytes(b: &[u8]) -> Result<Self, DisassemblerError> {
29        let elf_file = ElfFile64::<Endianness>::parse(b).map_err(|e| {
30            eprintln!("ELF parse error: {}", e);
31            DisassemblerError::NonStandardElfHeader
32        })?;
33
34        // Parse elf header.
35        let elf_header = ELFHeader::from_elf_file(&elf_file)?;
36
37        // Parse program headers.
38        let program_headers = ProgramHeader::from_elf_file(&elf_file)?;
39
40        // Parse section headers and section header entries.
41        let (section_headers, section_header_entries) = SectionHeader::from_elf_file(&elf_file)?;
42
43        // Parse relocations.
44        let relocations = Relocation::from_elf_file(&elf_file)?;
45
46        Ok(Self {
47            elf_header,
48            program_headers,
49            section_headers,
50            section_header_entries,
51            relocations,
52        })
53    }
54
55    pub fn to_ixs(self) -> Result<(Vec<Instruction>, Option<RodataSection>), DisassemblerError> {
56        // Find and populate instructions for the .text section
57        let text_section = self
58            .section_header_entries
59            .iter()
60            .find(|e| e.label.eq(".text\0"))
61            .ok_or(DisassemblerError::MissingTextSection)?;
62        let text_section_offset = text_section.offset as u64;
63
64        // Build syscall map
65        let syscall_map = self.build_syscall_map(text_section_offset);
66
67        let data = &text_section.data;
68        if !data.len().is_multiple_of(8) {
69            return Err(DisassemblerError::InvalidDataLength);
70        }
71
72        let is_sbpf_v2 =
73            self.elf_header.e_flags == 0x02 && self.elf_header.e_machine == E_MACHINE_SBPF;
74
75        // Get rodata info
76        let rodata_info = self.get_rodata_info();
77        let (rodata_base, rodata_end) = rodata_info
78            .as_ref()
79            .map(|(d, addr)| (*addr, *addr + d.len() as u64))
80            .unwrap_or((0, 0));
81
82        // Parse instructions and build slot/position mappings
83        let mut ixs: Vec<Instruction> = Vec::new();
84        let mut slot_to_position: Vec<u64> = Vec::new();
85        let mut idx_to_slot: Vec<usize> = Vec::new();
86        let mut pos: usize = 0;
87        let mut slot: usize = 0;
88
89        while pos < data.len() {
90            let remaining = &data[pos..];
91            if remaining.len() < 8 {
92                break;
93            }
94
95            // ugly v2 shit we need to fix goes here:
96            let mut ix = if is_sbpf_v2 {
97                Instruction::from_bytes_sbpf_v2(remaining)?
98            } else {
99                Instruction::from_bytes(remaining)?
100            };
101
102            // Handle syscall relocation
103            if ix.opcode == Opcode::Call
104                && let Some(Either::Right(Number::Int(-1))) = ix.imm
105                && let Some(syscall_name) = syscall_map.get(&(pos as u64))
106            {
107                ix.imm = Some(Either::Left(syscall_name.clone()));
108            }
109
110            slot_to_position.push(pos as u64);
111            idx_to_slot.push(slot);
112
113            if ix.opcode == Opcode::Lddw {
114                slot_to_position.push(pos as u64 + 8);
115                pos += 16;
116                slot += 2;
117            } else {
118                pos += 8;
119                slot += 1;
120            }
121
122            ixs.push(ix);
123        }
124
125        // Resolve jump/call labels and collect rodata references
126        let mut rodata_refs = BTreeSet::new();
127
128        for (idx, ix) in ixs.iter_mut().enumerate() {
129            let is_lddw = ix.opcode == Opcode::Lddw;
130
131            // Resolve jump targets
132            if ix.is_jump()
133                && let Some(Either::Right(off)) = &ix.off
134            {
135                let current_slot = idx_to_slot[idx];
136                let target_slot = (current_slot as i64 + 1 + (*off as i64)) as usize;
137                if let Some(&target_pos) = slot_to_position.get(target_slot) {
138                    ix.off = Some(Either::Left(format!("jmp_{:04x}", target_pos)));
139                }
140            }
141
142            // Resolve internal call targets
143            if ix.opcode == Opcode::Call
144                && let Some(Either::Right(Number::Int(imm))) = &ix.imm
145            {
146                let current_slot = idx_to_slot[idx] as i64;
147                let target_slot = current_slot + 1 + *imm;
148                if target_slot >= 0
149                    && let Some(&target_pos) = slot_to_position.get(target_slot as usize)
150                {
151                    ix.imm = Some(Either::Left(format!("fn_{:04x}", target_pos)));
152                }
153            }
154
155            // Collect rodata references
156            if is_lddw
157                && rodata_info.is_some()
158                && let Some(Either::Right(Number::Int(imm))) = &ix.imm
159            {
160                let addr = *imm as u64;
161                if addr >= rodata_base && addr < rodata_end {
162                    rodata_refs.insert(addr);
163                }
164            }
165        }
166
167        // Parse rodata and replace addresses with labels
168        let rodata = if let Some((data, base_addr)) = rodata_info {
169            let rodata = RodataSection::parse(data, base_addr, &rodata_refs);
170
171            for ix in &mut ixs {
172                if ix.opcode == Opcode::Lddw
173                    && let Some(Either::Right(Number::Int(imm))) = &ix.imm
174                {
175                    let addr = *imm as u64;
176                    if let Some(label) = rodata.get_label(addr) {
177                        ix.imm = Some(Either::Left(label.to_string()));
178                    }
179                }
180            }
181
182            Some(rodata)
183        } else {
184            None
185        };
186
187        Ok((ixs, rodata))
188    }
189
190    /// Build a hashmap where:
191    /// - key: relative position within .text section
192    /// - value: syscall name (sol_log_64_, sol_log_, etc.)
193    fn build_syscall_map(&self, text_section_offset: u64) -> HashMap<u64, String> {
194        self.relocations
195            .iter()
196            .filter(|r| r.is_syscall())
197            .filter_map(|r| {
198                r.symbol_name.as_ref().map(|name| {
199                    // Convert absolute offset to relative position within .text
200                    let relative_pos = r.relative_offset(text_section_offset);
201                    (relative_pos, name.clone())
202                })
203            })
204            .collect()
205    }
206
207    /// Get the raw rodata bytes and the virtual address where it's loaded in memory
208    fn get_rodata_info(&self) -> Option<(Vec<u8>, u64)> {
209        let rodata_entry = self
210            .section_header_entries
211            .iter()
212            .find(|e| e.label.starts_with(".rodata"))?;
213
214        let rodata_header = self
215            .section_headers
216            .iter()
217            .find(|h| h.sh_offset as usize == rodata_entry.offset)?;
218
219        Some((rodata_entry.data.clone(), rodata_header.sh_addr))
220    }
221
222    /// Get the entrypoint offset
223    pub fn get_entrypoint_offset(&self) -> Option<u64> {
224        let text_header = self.section_headers.iter().find(|h| {
225            self.section_header_entries
226                .iter()
227                .any(|e| e.label.eq(".text\0") && e.offset == h.sh_offset as usize)
228        })?;
229        let text_sh_addr = text_header.sh_addr;
230        let e_entry = self.elf_header.e_entry;
231
232        if e_entry >= text_sh_addr {
233            Some(e_entry - text_sh_addr)
234        } else {
235            None
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use {
243        crate::{
244            elf_header::{E_MACHINE_SBPF, ELFHeader},
245            program::Program,
246            section_header_entry::SectionHeaderEntry,
247        },
248        hex_literal::hex,
249    };
250
251    #[test]
252    fn try_deserialize_program() {
253        let program = Program::from_bytes(&hex!("7F454C460201010000000000000000000300F700010000002001000000000000400000000000000028020000000000000000000040003800030040000600050001000000050000002001000000000000200100000000000020010000000000003000000000000000300000000000000000100000000000000100000004000000C001000000000000C001000000000000C0010000000000003C000000000000003C000000000000000010000000000000020000000600000050010000000000005001000000000000500100000000000070000000000000007000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007912A000000000007911182900000000B7000000010000002D21010000000000B70000000000000095000000000000001E0000000000000004000000000000000600000000000000C0010000000000000B0000000000000018000000000000000500000000000000F0010000000000000A000000000000000C00000000000000160000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120001002001000000000000300000000000000000656E747279706F696E7400002E74657874002E64796E737472002E64796E73796D002E64796E616D6963002E73687374727461620000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010000000600000000000000200100000000000020010000000000003000000000000000000000000000000008000000000000000000000000000000170000000600000003000000000000005001000000000000500100000000000070000000000000000400000000000000080000000000000010000000000000000F0000000B0000000200000000000000C001000000000000C001000000000000300000000000000004000000010000000800000000000000180000000000000007000000030000000200000000000000F001000000000000F0010000000000000C00000000000000000000000000000001000000000000000000000000000000200000000300000000000000000000000000000000000000FC010000000000002A00000000000000000000000000000001000000000000000000000000000000")).unwrap();
254        println!("{:?}", program.section_header_entries);
255    }
256
257    #[test]
258    fn test_to_ixs_invalid_data_length() {
259        // Create program with .text section that has invalid length (not multiple of 8)
260        let program = Program {
261            elf_header: ELFHeader {
262                ei_magic: [127, 69, 76, 70],
263                ei_class: 2,
264                ei_data: 1,
265                ei_version: 1,
266                ei_osabi: 0,
267                ei_abiversion: 0,
268                ei_pad: [0; 7],
269                e_type: 0,
270                e_machine: 0,
271                e_version: 0,
272                e_entry: 0,
273                e_phoff: 0,
274                e_shoff: 0,
275                e_flags: 0,
276                e_ehsize: 0,
277                e_phentsize: 0,
278                e_phnum: 0,
279                e_shentsize: 0,
280                e_shnum: 0,
281                e_shstrndx: 0,
282            },
283            program_headers: vec![],
284            section_headers: vec![],
285            section_header_entries: vec![
286                SectionHeaderEntry::new(".text\0".to_string(), 0, vec![0x95, 0x00, 0x00]).unwrap(), // Only 3 bytes
287            ],
288            relocations: vec![],
289        };
290
291        let result = program.to_ixs();
292        assert!(result.is_err());
293        assert!(matches!(
294            result.unwrap_err(),
295            crate::errors::DisassemblerError::InvalidDataLength
296        ));
297    }
298
299    #[test]
300    fn test_to_ixs_with_lddw() {
301        // Test with 16 bytes lddw instruction
302
303        let mut lddw_bytes = vec![0x18, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
304        lddw_bytes.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
305        lddw_bytes.extend_from_slice(&[0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); // exit
306
307        let program = Program {
308            elf_header: ELFHeader {
309                ei_magic: [127, 69, 76, 70],
310                ei_class: 2,
311                ei_data: 1,
312                ei_version: 1,
313                ei_osabi: 0,
314                ei_abiversion: 0,
315                ei_pad: [0; 7],
316                e_type: 0,
317                e_machine: E_MACHINE_SBPF,
318                e_version: 0,
319                e_entry: 0,
320                e_phoff: 0,
321                e_shoff: 0,
322                e_flags: 0,
323                e_ehsize: 0,
324                e_phentsize: 0,
325                e_phnum: 0,
326                e_shentsize: 0,
327                e_shnum: 0,
328                e_shstrndx: 0,
329            },
330            program_headers: vec![],
331            section_headers: vec![],
332            section_header_entries: vec![
333                SectionHeaderEntry::new(".text\0".to_string(), 0, lddw_bytes).unwrap(),
334            ],
335            relocations: vec![],
336        };
337
338        let (ixs, _) = program.to_ixs().unwrap();
339        assert_eq!(ixs.len(), 2); // lddw + exit
340        assert_eq!(ixs[0].opcode, sbpf_common::opcode::Opcode::Lddw);
341    }
342
343    #[test]
344    fn test_to_ixs_sbpf_v2() {
345        // Use a v2 opcode (0x8C -> ldxw in v2)
346        let v2_bytes = vec![0x8c, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
347
348        let program = Program {
349            elf_header: ELFHeader {
350                ei_magic: [127, 69, 76, 70],
351                ei_class: 2,
352                ei_data: 1,
353                ei_version: 1,
354                ei_osabi: 0,
355                ei_abiversion: 0,
356                ei_pad: [0; 7],
357                e_type: 0,
358                e_machine: E_MACHINE_SBPF,
359                e_version: 0,
360                e_entry: 0,
361                e_phoff: 0,
362                e_shoff: 0,
363                e_flags: 0x02, // SBPF v2 flag
364                e_ehsize: 0,
365                e_phentsize: 0,
366                e_phnum: 0,
367                e_shentsize: 0,
368                e_shnum: 0,
369                e_shstrndx: 0,
370            },
371            program_headers: vec![],
372            section_headers: vec![],
373            section_header_entries: vec![
374                SectionHeaderEntry::new(".text\0".to_string(), 0, v2_bytes).unwrap(),
375            ],
376            relocations: vec![],
377        };
378
379        let (ixs, _) = program.to_ixs().unwrap();
380        assert_eq!(ixs.len(), 1);
381        assert_eq!(ixs[0].opcode, sbpf_common::opcode::Opcode::Ldxw);
382    }
383}