sbpf_assembler/
program.rs

1use {
2    crate::{
3        debuginfo::DebugInfo,
4        dynsym::{DynamicSymbol, RelDyn, RelocationType},
5        header::{ElfHeader, ProgramHeader},
6        parser::ParseResult,
7        section::{
8            DynStrSection, DynSymSection, DynamicSection, NullSection, RelDynSection, Section,
9            SectionType, ShStrTabSection,
10        },
11    },
12    std::{collections::HashMap, fs::File, io::Write, path::Path},
13};
14
15#[derive(Debug)]
16pub struct Program {
17    pub elf_header: ElfHeader,
18    pub program_headers: Option<Vec<ProgramHeader>>,
19    pub sections: Vec<SectionType>,
20}
21
22impl Program {
23    pub fn from_parse_result(
24        ParseResult {
25            code_section,
26            data_section,
27            dynamic_symbols,
28            relocation_data,
29            prog_is_static,
30        }: ParseResult,
31    ) -> Self {
32        let mut elf_header = ElfHeader::new();
33        let mut program_headers = None;
34
35        // omit program headers if static
36        let ph_count = if prog_is_static { 0 } else { 3 };
37        elf_header.e_phnum = ph_count;
38
39        // save read + execute size for program header before
40        // ownership of code/data sections is transferred
41        let text_size = code_section.size() + data_section.size();
42
43        // Calculate base offset after ELF header and program headers
44        let mut current_offset = 64 + (ph_count as u64 * 56); // 64 bytes ELF header, 56 bytes per program header
45
46        let text_offset = current_offset;
47
48        // Get the entry point offset from dynamic_symbols if available
49        let entry_point_offset = dynamic_symbols
50            .get_entry_points()
51            .first()
52            .map(|(_, offset)| *offset)
53            .unwrap_or(0);
54
55        elf_header.e_entry = text_offset + entry_point_offset;
56
57        // Create a vector of sections
58        let mut sections = Vec::new();
59        sections.push(SectionType::Default(NullSection::new()));
60
61        let mut section_names = Vec::new();
62
63        // Code section
64        let mut text_section = SectionType::Code(code_section);
65        text_section.set_offset(current_offset);
66        current_offset += text_section.size();
67        section_names.push(text_section.name().to_string());
68        sections.push(text_section);
69
70        // Data section
71        if data_section.size() > 0 {
72            let mut rodata_section = SectionType::Data(data_section);
73            rodata_section.set_offset(current_offset);
74            current_offset += rodata_section.size();
75            section_names.push(rodata_section.name().to_string());
76            sections.push(rodata_section);
77        }
78
79        let padding = (8 - (current_offset % 8)) % 8;
80        current_offset += padding;
81
82        if !prog_is_static {
83            let mut symbol_names = Vec::new();
84            let mut dyn_syms = Vec::new();
85            let mut dyn_str_offset = 1;
86
87            dyn_syms.push(DynamicSymbol::new(0, 0, 0, 0, 0, 0));
88
89            // all symbols handled right now are all global symbols
90            for (name, _) in dynamic_symbols.get_entry_points() {
91                symbol_names.push(name.clone());
92                dyn_syms.push(DynamicSymbol::new(
93                    dyn_str_offset as u32,
94                    0x10,
95                    0,
96                    1,
97                    elf_header.e_entry,
98                    0,
99                ));
100                dyn_str_offset += name.len() + 1;
101            }
102
103            for (name, _) in dynamic_symbols.get_call_targets() {
104                symbol_names.push(name.clone());
105                dyn_syms.push(DynamicSymbol::new(dyn_str_offset as u32, 0x10, 0, 0, 0, 0));
106                dyn_str_offset += name.len() + 1;
107            }
108
109            let mut rel_count = 0;
110            let mut rel_dyns = Vec::new();
111            for (offset, rel_type, name) in relocation_data.get_rel_dyns() {
112                if rel_type == RelocationType::RSbfSyscall {
113                    if let Some(index) = symbol_names.iter().position(|n| *n == name) {
114                        rel_dyns.push(RelDyn::new(
115                            offset + text_offset,
116                            rel_type as u64,
117                            index as u64 + 1,
118                        ));
119                    } else {
120                        panic!("Symbol {} not found in symbol_names", name);
121                    }
122                } else if rel_type == RelocationType::RSbf64Relative {
123                    rel_count += 1;
124                    rel_dyns.push(RelDyn::new(offset + text_offset, rel_type as u64, 0));
125                }
126            }
127            // create four dynamic related sections
128            let mut dynamic_section = SectionType::Dynamic(DynamicSection::new(
129                (section_names
130                    .iter()
131                    .map(|name| name.len() + 1)
132                    .sum::<usize>()
133                    + 1) as u32,
134            ));
135            section_names.push(dynamic_section.name().to_string());
136
137            let mut dynsym_section = SectionType::DynSym(DynSymSection::new(
138                (section_names
139                    .iter()
140                    .map(|name| name.len() + 1)
141                    .sum::<usize>()
142                    + 1) as u32,
143                dyn_syms,
144            ));
145            section_names.push(dynsym_section.name().to_string());
146
147            let mut dynstr_section = SectionType::DynStr(DynStrSection::new(
148                (section_names
149                    .iter()
150                    .map(|name| name.len() + 1)
151                    .sum::<usize>()
152                    + 1) as u32,
153                symbol_names,
154            ));
155            section_names.push(dynstr_section.name().to_string());
156
157            let mut rel_dyn_section = SectionType::RelDyn(RelDynSection::new(
158                (section_names
159                    .iter()
160                    .map(|name| name.len() + 1)
161                    .sum::<usize>()
162                    + 1) as u32,
163                rel_dyns,
164            ));
165            section_names.push(rel_dyn_section.name().to_string());
166
167            dynamic_section.set_offset(current_offset);
168            if let SectionType::Dynamic(ref mut dynamic_section) = dynamic_section {
169                // link to .dynstr
170                dynamic_section.set_link(
171                    section_names
172                        .iter()
173                        .position(|name| name == ".dynstr")
174                        .expect("missing .dynstr section") as u32
175                        + 1,
176                );
177                dynamic_section.set_rel_count(rel_count);
178            }
179            current_offset += dynamic_section.size();
180
181            dynsym_section.set_offset(current_offset);
182            if let SectionType::DynSym(ref mut dynsym_section) = dynsym_section {
183                // link to .dynstr
184                dynsym_section.set_link(
185                    section_names
186                        .iter()
187                        .position(|name| name == ".dynstr")
188                        .expect("missing .dynstr section") as u32
189                        + 1,
190                );
191            }
192            current_offset += dynsym_section.size();
193
194            dynstr_section.set_offset(current_offset);
195            current_offset += dynstr_section.size();
196
197            rel_dyn_section.set_offset(current_offset);
198            if let SectionType::RelDyn(ref mut rel_dyn_section) = rel_dyn_section {
199                // link to .dynsym
200                rel_dyn_section.set_link(
201                    section_names
202                        .iter()
203                        .position(|name| name == ".dynsym")
204                        .expect("missing .dynsym section") as u32
205                        + 1,
206                );
207            }
208            current_offset += rel_dyn_section.size();
209
210            if let SectionType::Dynamic(ref mut dynamic_section) = dynamic_section {
211                dynamic_section.set_rel_offset(rel_dyn_section.offset());
212                dynamic_section.set_rel_size(rel_dyn_section.size());
213                dynamic_section.set_dynsym_offset(dynsym_section.offset());
214                dynamic_section.set_dynstr_offset(dynstr_section.offset());
215                dynamic_section.set_dynstr_size(dynstr_section.size());
216            }
217
218            let mut shstrtab_section = SectionType::ShStrTab(ShStrTabSection::new(
219                (section_names
220                    .iter()
221                    .map(|name| name.len() + 1)
222                    .sum::<usize>()
223                    + 1) as u32,
224                section_names,
225            ));
226            shstrtab_section.set_offset(current_offset);
227            current_offset += shstrtab_section.size();
228
229            program_headers = Some(vec![
230                ProgramHeader::new_load(
231                    text_offset,
232                    text_size,
233                    true, // executable
234                ),
235                ProgramHeader::new_load(
236                    dynsym_section.offset(),
237                    dynsym_section.size() + dynstr_section.size() + rel_dyn_section.size(),
238                    false,
239                ),
240                ProgramHeader::new_dynamic(dynamic_section.offset(), dynamic_section.size()),
241            ]);
242
243            sections.push(dynamic_section);
244            sections.push(dynsym_section);
245            sections.push(dynstr_section);
246            sections.push(rel_dyn_section);
247            sections.push(shstrtab_section);
248        } else {
249            // Create a vector of section names
250            let mut section_names = Vec::new();
251            for section in &sections {
252                section_names.push(section.name().to_string());
253            }
254
255            let mut shstrtab_section = ShStrTabSection::new(
256                section_names
257                    .iter()
258                    .map(|name| name.len() + 1)
259                    .sum::<usize>() as u32,
260                section_names,
261            );
262            shstrtab_section.set_offset(current_offset);
263            current_offset += shstrtab_section.size();
264            sections.push(SectionType::ShStrTab(shstrtab_section));
265        }
266
267        // Update section header offset in ELF header
268        let padding = (8 - (current_offset % 8)) % 8;
269        elf_header.e_shoff = current_offset + padding;
270        elf_header.e_shnum = sections.len() as u16;
271        elf_header.e_shstrndx = sections.len() as u16 - 1;
272
273        Self {
274            elf_header,
275            program_headers,
276            sections,
277        }
278    }
279
280    pub fn emit_bytecode(&self) -> Vec<u8> {
281        let mut bytes = Vec::new();
282
283        // Emit ELF Header bytes
284        bytes.extend(self.elf_header.bytecode());
285
286        // Emit program headers
287        if self.program_headers.is_some() {
288            for ph in self.program_headers.as_ref().unwrap() {
289                bytes.extend(ph.bytecode());
290            }
291        }
292
293        // Emit sections
294        for section in &self.sections {
295            bytes.extend(section.bytecode());
296        }
297
298        // Emit section headers
299        for section in &self.sections {
300            bytes.extend(section.section_header_bytecode());
301        }
302
303        bytes
304    }
305
306    pub fn has_rodata(&self) -> bool {
307        self.sections.iter().any(|s| s.name() == ".rodata")
308    }
309
310    pub fn parse_rodata(&self) -> Vec<(String, usize, String)> {
311        let rodata = self
312            .sections
313            .iter()
314            .find(|s| s.name() == ".rodata")
315            .unwrap();
316        if let SectionType::Data(data_section) = rodata {
317            data_section.rodata()
318        } else {
319            panic!("ROData section not found");
320        }
321    }
322
323    pub fn get_debug_map(&self) -> HashMap<u64, DebugInfo> {
324        let code = self.sections.iter().find(|s| s.name() == ".text").unwrap();
325        if let SectionType::Code(code_section) = code {
326            code_section.get_debug_map().clone()
327        } else {
328            panic!("Code section not found");
329        }
330    }
331
332    pub fn save_to_file(&self, input_path: &str) -> std::io::Result<()> {
333        // Get the file stem (name without extension) from input path
334        let path = Path::new(input_path);
335        let file_stem = path
336            .file_stem()
337            .and_then(|s| s.to_str())
338            .unwrap_or("output");
339
340        // Create the output file name with .so extension
341        let output_path = format!("{}.so", file_stem);
342
343        // Get the bytecode
344        let bytes = self.emit_bytecode();
345
346        // Write bytes to file
347        let mut file = File::create(output_path)?;
348        file.write_all(&bytes)?;
349
350        Ok(())
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use {super::*, crate::parser::parse};
357
358    #[test]
359    fn test_program_from_simple_source() {
360        let source = "exit";
361        let parse_result = parse(source).unwrap();
362        let program = Program::from_parse_result(parse_result);
363
364        // Verify basic structure
365        assert!(!program.sections.is_empty());
366        assert!(program.sections.len() >= 2);
367    }
368
369    #[test]
370    fn test_program_without_rodata() {
371        let source = "exit";
372        let parse_result = parse(source).unwrap();
373        let program = Program::from_parse_result(parse_result);
374
375        assert!(!program.has_rodata());
376    }
377
378    #[test]
379    fn test_program_emit_bytecode() {
380        let source = "exit";
381        let parse_result = parse(source).unwrap();
382        let program = Program::from_parse_result(parse_result);
383
384        let bytecode = program.emit_bytecode();
385        assert!(!bytecode.is_empty());
386        // Should start with ELF magic
387        assert_eq!(&bytecode[0..4], b"\x7fELF");
388    }
389
390    #[test]
391    fn test_program_get_debug_map() {
392        let source = "exit";
393        let parse_result = parse(source).unwrap();
394        let program = Program::from_parse_result(parse_result);
395
396        let debug_map = program.get_debug_map();
397        assert!(!debug_map.is_empty());
398    }
399
400    #[test]
401    fn test_program_static_no_program_headers() {
402        // Create a static program (no dynamic symbols)
403        let source = "exit";
404        let mut parse_result = parse(source).unwrap();
405        parse_result.prog_is_static = true;
406
407        let program = Program::from_parse_result(parse_result);
408        assert!(program.program_headers.is_none());
409        assert_eq!(program.elf_header.e_phnum, 0);
410    }
411
412    #[test]
413    fn test_program_sections_ordering() {
414        let source = "exit";
415        let parse_result = parse(source).unwrap();
416        let program = Program::from_parse_result(parse_result);
417
418        // First section should be null
419        assert_eq!(program.sections[0].name(), "");
420        // Second should be .text
421        assert_eq!(program.sections[1].name(), ".text");
422    }
423}