Skip to main content

synth_backend/
elf_builder.rs

1//! ELF (Executable and Linkable Format) Builder for ARM
2//!
3//! Generates ELF32 files for ARM Cortex-M targets
4
5use synth_core::Result;
6
7/// ELF file class
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ElfClass {
10    /// 32-bit
11    Elf32 = 1,
12    /// 64-bit
13    Elf64 = 2,
14}
15
16/// ELF data encoding
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ElfData {
19    /// Little-endian
20    LittleEndian = 1,
21    /// Big-endian
22    BigEndian = 2,
23}
24
25/// ELF file type
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum ElfType {
28    /// Relocatable file
29    Rel = 1,
30    /// Executable file
31    Exec = 2,
32    /// Shared object file
33    Dyn = 3,
34}
35
36/// ELF machine architecture
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ElfMachine {
39    /// ARM
40    Arm = 40,
41    /// ARM64/AArch64
42    AArch64 = 183,
43}
44
45/// Section type
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum SectionType {
48    /// Null section
49    Null = 0,
50    /// Program data
51    ProgBits = 1,
52    /// Symbol table
53    SymTab = 2,
54    /// String table
55    StrTab = 3,
56    /// Relocation entries with addends
57    Rela = 4,
58    /// Symbol hash table
59    Hash = 5,
60    /// Dynamic linking information
61    Dynamic = 6,
62    /// Note
63    Note = 7,
64    /// No space (BSS)
65    NoBits = 8,
66    /// Relocation entries
67    Rel = 9,
68}
69
70/// Section flags
71#[derive(Debug, Clone, Copy)]
72pub struct SectionFlags(pub u32);
73
74impl SectionFlags {
75    /// Writable
76    pub const WRITE: u32 = 0x1;
77    /// Occupies memory during execution
78    pub const ALLOC: u32 = 0x2;
79    /// Executable
80    pub const EXEC: u32 = 0x4;
81    /// Mergeable
82    pub const MERGE: u32 = 0x10;
83    /// Contains null-terminated strings
84    pub const STRINGS: u32 = 0x20;
85}
86
87/// ELF section
88#[derive(Debug, Clone)]
89pub struct Section {
90    /// Section name (index into string table)
91    pub name: String,
92    /// Section type
93    pub section_type: SectionType,
94    /// Section flags
95    pub flags: u32,
96    /// Virtual address
97    pub addr: u32,
98    /// Section data
99    pub data: Vec<u8>,
100    /// Alignment
101    pub align: u32,
102    /// Explicit size (for NoBits sections like .bss where data is empty)
103    pub explicit_size: Option<u32>,
104}
105
106impl Section {
107    /// Create a new section
108    pub fn new(name: &str, section_type: SectionType) -> Self {
109        Self {
110            name: name.to_string(),
111            section_type,
112            flags: 0,
113            addr: 0,
114            data: Vec::new(),
115            align: 1,
116            explicit_size: None,
117        }
118    }
119
120    /// Set flags
121    pub fn with_flags(mut self, flags: u32) -> Self {
122        self.flags = flags;
123        self
124    }
125
126    /// Set address
127    pub fn with_addr(mut self, addr: u32) -> Self {
128        self.addr = addr;
129        self
130    }
131
132    /// Set alignment
133    pub fn with_align(mut self, align: u32) -> Self {
134        self.align = align;
135        self
136    }
137
138    /// Add data
139    pub fn with_data(mut self, data: Vec<u8>) -> Self {
140        self.data = data;
141        self
142    }
143
144    /// Set explicit size (for NoBits sections like .bss where data is empty)
145    pub fn with_size(mut self, size: u32) -> Self {
146        self.explicit_size = Some(size);
147        self
148    }
149
150    /// Get the effective size of the section
151    pub fn size(&self) -> u32 {
152        self.explicit_size.unwrap_or(self.data.len() as u32)
153    }
154}
155
156/// Symbol binding
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum SymbolBinding {
159    /// Local symbol
160    Local = 0,
161    /// Global symbol
162    Global = 1,
163    /// Weak symbol
164    Weak = 2,
165}
166
167/// Symbol type
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum SymbolType {
170    /// No type
171    NoType = 0,
172    /// Object (data)
173    Object = 1,
174    /// Function
175    Func = 2,
176    /// Section
177    Section = 3,
178    /// File name
179    File = 4,
180}
181
182/// ELF symbol
183#[derive(Debug, Clone)]
184pub struct Symbol {
185    /// Symbol name
186    pub name: String,
187    /// Value/address
188    pub value: u32,
189    /// Size
190    pub size: u32,
191    /// Binding
192    pub binding: SymbolBinding,
193    /// Type
194    pub symbol_type: SymbolType,
195    /// Section index
196    pub section: u16,
197}
198
199impl Symbol {
200    /// Create a new symbol
201    pub fn new(name: &str) -> Self {
202        Self {
203            name: name.to_string(),
204            value: 0,
205            size: 0,
206            binding: SymbolBinding::Local,
207            symbol_type: SymbolType::NoType,
208            section: 0,
209        }
210    }
211
212    /// Set value
213    pub fn with_value(mut self, value: u32) -> Self {
214        self.value = value;
215        self
216    }
217
218    /// Set size
219    pub fn with_size(mut self, size: u32) -> Self {
220        self.size = size;
221        self
222    }
223
224    /// Set binding
225    pub fn with_binding(mut self, binding: SymbolBinding) -> Self {
226        self.binding = binding;
227        self
228    }
229
230    /// Set type
231    pub fn with_type(mut self, symbol_type: SymbolType) -> Self {
232        self.symbol_type = symbol_type;
233        self
234    }
235
236    /// Set section
237    pub fn with_section(mut self, section: u16) -> Self {
238        self.section = section;
239        self
240    }
241}
242
243/// Program header type
244#[derive(Debug, Clone, Copy, PartialEq, Eq)]
245pub enum ProgramType {
246    /// Null entry
247    Null = 0,
248    /// Loadable segment
249    Load = 1,
250    /// Dynamic linking info
251    Dynamic = 2,
252    /// Interpreter path
253    Interp = 3,
254    /// Note section
255    Note = 4,
256}
257
258/// Program header flags
259pub struct ProgramFlags;
260
261impl ProgramFlags {
262    /// Executable
263    pub const EXEC: u32 = 0x1;
264    /// Writable
265    pub const WRITE: u32 = 0x2;
266    /// Readable
267    pub const READ: u32 = 0x4;
268}
269
270/// ELF program header (segment)
271#[derive(Debug, Clone)]
272pub struct ProgramHeader {
273    /// Segment type
274    pub p_type: ProgramType,
275    /// Offset in file
276    pub offset: u32,
277    /// Virtual address
278    pub vaddr: u32,
279    /// Physical address
280    pub paddr: u32,
281    /// Size in file
282    pub filesz: u32,
283    /// Size in memory
284    pub memsz: u32,
285    /// Flags (R/W/X)
286    pub flags: u32,
287    /// Alignment
288    pub align: u32,
289}
290
291impl ProgramHeader {
292    /// Create a new LOAD segment
293    pub fn load(vaddr: u32, offset: u32, size: u32, flags: u32) -> Self {
294        Self {
295            p_type: ProgramType::Load,
296            offset,
297            vaddr,
298            paddr: vaddr, // Physical = virtual for simple cases
299            filesz: size,
300            memsz: size,
301            flags,
302            align: 4,
303        }
304    }
305
306    /// Create a new LOAD segment for BSS-like regions (no file data, only memory)
307    /// Used for .bss, linear memory, and other zero-initialized regions
308    pub fn load_nobits(vaddr: u32, memsz: u32, flags: u32) -> Self {
309        Self {
310            p_type: ProgramType::Load,
311            offset: 0, // No file offset for NoBits
312            vaddr,
313            paddr: vaddr, // Physical = virtual
314            filesz: 0,    // No file data
315            memsz,        // Memory size to allocate
316            flags,
317            align: 4,
318        }
319    }
320}
321
322/// ARM relocation type
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ArmRelocationType {
325    /// R_ARM_CALL (28) — BL/BLX instruction
326    Call = 28,
327    /// R_ARM_JUMP24 (29) — B/BL<cond> instruction
328    Jump24 = 29,
329    /// R_ARM_ABS32 (2) — Direct 32-bit reference
330    Abs32 = 2,
331    /// R_ARM_MOVW_ABS_NC (43) — MOVW instruction (low 16 bits)
332    MovwAbsNc = 43,
333    /// R_ARM_MOVT_ABS (44) — MOVT instruction (high 16 bits)
334    MovtAbs = 44,
335}
336
337/// ELF relocation entry (REL format, no addend)
338#[derive(Debug, Clone)]
339pub struct Relocation {
340    /// Offset within the section where the relocation applies
341    pub offset: u32,
342    /// Symbol index in the symbol table
343    pub symbol_index: u32,
344    /// Relocation type
345    pub reloc_type: ArmRelocationType,
346}
347
348/// ARM EABI version 5 (soft-float)
349pub const EF_ARM_EABI_VER5: u32 = 0x05000000;
350/// ARM hard-float ABI flag
351pub const EF_ARM_ABI_FLOAT_HARD: u32 = 0x00000400;
352/// ARM soft-float ABI flag
353pub const EF_ARM_ABI_FLOAT_SOFT: u32 = 0x00000200;
354
355/// ELF file builder
356pub struct ElfBuilder {
357    /// File class (32 or 64 bit)
358    class: ElfClass,
359    /// Data encoding
360    data: ElfData,
361    /// File type
362    elf_type: ElfType,
363    /// Machine architecture
364    machine: ElfMachine,
365    /// Entry point address
366    entry: u32,
367    /// ELF e_flags (EABI version + float ABI)
368    e_flags: u32,
369    /// Sections
370    sections: Vec<Section>,
371    /// Symbols
372    symbols: Vec<Symbol>,
373    /// Program headers (segments)
374    program_headers: Vec<ProgramHeader>,
375    /// Relocations for .text section
376    relocations: Vec<Relocation>,
377}
378
379impl ElfBuilder {
380    /// Create a new ELF builder for ARM32
381    pub fn new_arm32() -> Self {
382        Self {
383            class: ElfClass::Elf32,
384            data: ElfData::LittleEndian,
385            elf_type: ElfType::Exec,
386            machine: ElfMachine::Arm,
387            entry: 0,
388            e_flags: EF_ARM_EABI_VER5,
389            sections: Vec::new(),
390            symbols: Vec::new(),
391            program_headers: Vec::new(),
392            relocations: Vec::new(),
393        }
394    }
395
396    /// Set entry point
397    ///
398    /// For ARM (Thumb) targets, bit 0 is automatically set to indicate Thumb mode.
399    /// Cortex-M is Thumb-only, so function addresses in ELF must have bit 0 set.
400    pub fn with_entry(mut self, entry: u32) -> Self {
401        self.entry = if self.machine == ElfMachine::Arm {
402            entry | 1 // Set Thumb bit for ARM targets
403        } else {
404            entry
405        };
406        self
407    }
408
409    /// Set ELF e_flags (e.g. to add hard-float ABI)
410    pub fn set_flags(&mut self, flags: u32) {
411        self.e_flags = flags;
412    }
413
414    /// Set file type
415    pub fn with_type(mut self, elf_type: ElfType) -> Self {
416        self.elf_type = elf_type;
417        self
418    }
419
420    /// Add a section
421    pub fn add_section(&mut self, section: Section) {
422        self.sections.push(section);
423    }
424
425    /// Add a symbol
426    pub fn add_symbol(&mut self, symbol: Symbol) {
427        self.symbols.push(symbol);
428    }
429
430    /// Add a program header (segment)
431    pub fn add_program_header(&mut self, ph: ProgramHeader) {
432        self.program_headers.push(ph);
433    }
434
435    /// Add a relocation entry for the .text section
436    pub fn add_relocation(&mut self, reloc: Relocation) {
437        self.relocations.push(reloc);
438    }
439
440    /// Add an undefined external symbol (e.g., __meld_dispatch_import)
441    /// Returns the symbol index (1-based, accounting for null symbol)
442    pub fn add_undefined_symbol(&mut self, name: &str) -> u32 {
443        let index = self.symbols.len() as u32 + 1; // +1 for null symbol
444        self.symbols.push(Symbol {
445            name: name.to_string(),
446            value: 0,
447            size: 0,
448            binding: SymbolBinding::Global,
449            symbol_type: SymbolType::Func,
450            section: 0, // SHN_UNDEF
451        });
452        index
453    }
454
455    /// Build the ELF file to bytes
456    pub fn build(&self) -> Result<Vec<u8>> {
457        let mut output = Vec::new();
458
459        // ELF header size (52 bytes for ELF32)
460        let header_size = 52;
461        // Program header size (32 bytes for ELF32)
462        let ph_entry_size = 32;
463        let ph_count = self.program_headers.len();
464        let ph_table_size = ph_entry_size * ph_count;
465
466        // Reserve space for ELF header + program headers
467        output.resize(header_size + ph_table_size, 0);
468
469        // Build string table for section names
470        let (shstrtab_data, section_name_offsets) = self.build_section_string_table();
471
472        // Build symbol string table
473        let (strtab_data, symbol_name_offsets) = self.build_symbol_string_table();
474
475        // Calculate section offsets (after ELF header + program headers)
476        let mut current_offset = header_size + ph_table_size;
477
478        // Section 1: .shstrtab (section name string table)
479        let shstrtab_offset = current_offset;
480        current_offset += shstrtab_data.len();
481
482        // Section 2: .strtab (symbol name string table)
483        let strtab_offset = current_offset;
484        current_offset += strtab_data.len();
485
486        // User sections
487        let mut section_offsets = Vec::new();
488        for section in &self.sections {
489            section_offsets.push(current_offset);
490            current_offset += section.data.len();
491        }
492
493        // Section 3: .symtab (symbol table)
494        let symtab_offset = current_offset;
495        let symtab_data = self.build_symbol_table(&symbol_name_offsets);
496        current_offset += symtab_data.len();
497
498        // Section 4+ (optional): .rel.text (relocations)
499        let rel_data = self.build_relocation_table();
500        let rel_offset = current_offset;
501        current_offset += rel_data.len();
502
503        // Section header table comes at the end
504        let sh_offset = current_offset;
505
506        // Now write all the data
507        output.extend_from_slice(&shstrtab_data);
508        output.extend_from_slice(&strtab_data);
509
510        for section in &self.sections {
511            output.extend_from_slice(&section.data);
512        }
513
514        output.extend_from_slice(&symtab_data);
515        output.extend_from_slice(&rel_data);
516
517        // Write section headers
518        let section_headers = self.build_section_headers_with_rel(
519            &section_name_offsets,
520            shstrtab_offset,
521            &shstrtab_data,
522            strtab_offset,
523            &strtab_data,
524            symtab_offset,
525            &symtab_data,
526            &section_offsets,
527            rel_offset,
528            &rel_data,
529        );
530        output.extend_from_slice(&section_headers);
531
532        // Write program headers (right after ELF header)
533        // Auto-correct p_offset for LOAD segments by matching vaddr to section addrs
534        for (i, ph) in self.program_headers.iter().enumerate() {
535            let ph_offset = header_size + i * ph_entry_size;
536            let mut corrected_ph = ph.clone();
537            if corrected_ph.filesz > 0 {
538                // Find the section whose addr matches this segment's vaddr
539                for (si, section) in self.sections.iter().enumerate() {
540                    if section.addr == corrected_ph.vaddr && si < section_offsets.len() {
541                        corrected_ph.offset = section_offsets[si] as u32;
542                        break;
543                    }
544                }
545            }
546            self.write_program_header(
547                &mut output[ph_offset..ph_offset + ph_entry_size],
548                &corrected_ph,
549            );
550        }
551
552        // Now write the actual ELF header at the beginning
553        let has_rel = !self.relocations.is_empty();
554        let num_sections = 4 + self.sections.len() + if has_rel { 1 } else { 0 };
555        let ph_offset = if ph_count > 0 { header_size as u32 } else { 0 };
556        self.write_elf_header_with_phdrs(
557            &mut output[0..header_size],
558            ph_offset,
559            ph_count as u16,
560            sh_offset as u32,
561            num_sections as u16,
562        )?;
563
564        Ok(output)
565    }
566
567    /// Write a single program header
568    fn write_program_header(&self, output: &mut [u8], ph: &ProgramHeader) {
569        let mut cursor = 0;
570
571        // p_type (4 bytes)
572        output[cursor..cursor + 4].copy_from_slice(&(ph.p_type as u32).to_le_bytes());
573        cursor += 4;
574
575        // p_offset (4 bytes)
576        output[cursor..cursor + 4].copy_from_slice(&ph.offset.to_le_bytes());
577        cursor += 4;
578
579        // p_vaddr (4 bytes)
580        output[cursor..cursor + 4].copy_from_slice(&ph.vaddr.to_le_bytes());
581        cursor += 4;
582
583        // p_paddr (4 bytes)
584        output[cursor..cursor + 4].copy_from_slice(&ph.paddr.to_le_bytes());
585        cursor += 4;
586
587        // p_filesz (4 bytes)
588        output[cursor..cursor + 4].copy_from_slice(&ph.filesz.to_le_bytes());
589        cursor += 4;
590
591        // p_memsz (4 bytes)
592        output[cursor..cursor + 4].copy_from_slice(&ph.memsz.to_le_bytes());
593        cursor += 4;
594
595        // p_flags (4 bytes)
596        output[cursor..cursor + 4].copy_from_slice(&ph.flags.to_le_bytes());
597        cursor += 4;
598
599        // p_align (4 bytes)
600        output[cursor..cursor + 4].copy_from_slice(&ph.align.to_le_bytes());
601    }
602
603    /// Write ELF header with program header info
604    fn write_elf_header_with_phdrs(
605        &self,
606        output: &mut [u8],
607        ph_offset: u32,
608        ph_count: u16,
609        sh_offset: u32,
610        sh_count: u16,
611    ) -> Result<()> {
612        let mut cursor = 0;
613
614        // ELF magic number
615        output[cursor..cursor + 4].copy_from_slice(&[0x7f, b'E', b'L', b'F']);
616        cursor += 4;
617
618        // Class (32-bit)
619        output[cursor] = self.class as u8;
620        cursor += 1;
621
622        // Data (little-endian)
623        output[cursor] = self.data as u8;
624        cursor += 1;
625
626        // Version
627        output[cursor] = 1;
628        cursor += 1;
629
630        // OS/ABI
631        output[cursor] = 0; // System V
632        cursor += 1;
633
634        // ABI version
635        output[cursor] = 0;
636        cursor += 1;
637
638        // Padding (7 bytes)
639        output[cursor..cursor + 7].copy_from_slice(&[0; 7]);
640        cursor += 7;
641
642        // Type (little-endian u16)
643        let etype = self.elf_type as u16;
644        output[cursor..cursor + 2].copy_from_slice(&etype.to_le_bytes());
645        cursor += 2;
646
647        // Machine (little-endian u16)
648        let machine = self.machine as u16;
649        output[cursor..cursor + 2].copy_from_slice(&machine.to_le_bytes());
650        cursor += 2;
651
652        // Version (little-endian u32)
653        output[cursor..cursor + 4].copy_from_slice(&1u32.to_le_bytes());
654        cursor += 4;
655
656        // Entry point (little-endian u32)
657        output[cursor..cursor + 4].copy_from_slice(&self.entry.to_le_bytes());
658        cursor += 4;
659
660        // Program header offset (little-endian u32)
661        output[cursor..cursor + 4].copy_from_slice(&ph_offset.to_le_bytes());
662        cursor += 4;
663
664        // Section header offset (little-endian u32)
665        output[cursor..cursor + 4].copy_from_slice(&sh_offset.to_le_bytes());
666        cursor += 4;
667
668        // Flags (little-endian u32) - ARM EABI version 5 + float ABI
669        output[cursor..cursor + 4].copy_from_slice(&self.e_flags.to_le_bytes());
670        cursor += 4;
671
672        // ELF header size (little-endian u16)
673        output[cursor..cursor + 2].copy_from_slice(&52u16.to_le_bytes());
674        cursor += 2;
675
676        // Program header entry size (little-endian u16)
677        let ph_entry_size: u16 = if ph_count > 0 { 32 } else { 0 };
678        output[cursor..cursor + 2].copy_from_slice(&ph_entry_size.to_le_bytes());
679        cursor += 2;
680
681        // Program header count (little-endian u16)
682        output[cursor..cursor + 2].copy_from_slice(&ph_count.to_le_bytes());
683        cursor += 2;
684
685        // Section header entry size (little-endian u16)
686        output[cursor..cursor + 2].copy_from_slice(&40u16.to_le_bytes());
687        cursor += 2;
688
689        // Section header count (little-endian u16)
690        output[cursor..cursor + 2].copy_from_slice(&sh_count.to_le_bytes());
691        cursor += 2;
692
693        // Section header string table index (little-endian u16) - .shstrtab is section 1
694        output[cursor..cursor + 2].copy_from_slice(&1u16.to_le_bytes());
695
696        Ok(())
697    }
698
699    /// Build section name string table
700    fn build_section_string_table(&self) -> (Vec<u8>, Vec<usize>) {
701        let mut strtab = vec![0]; // null string at offset 0
702        let mut offsets = Vec::new();
703
704        // Standard sections
705        strtab.extend_from_slice(b".shstrtab\0");
706        strtab.extend_from_slice(b".strtab\0");
707        strtab.extend_from_slice(b".symtab\0");
708
709        // User sections
710        for section in &self.sections {
711            let offset = strtab.len();
712            offsets.push(offset);
713            strtab.extend_from_slice(section.name.as_bytes());
714            strtab.push(0);
715        }
716
717        // .rel.text (if relocations exist)
718        if !self.relocations.is_empty() {
719            strtab.extend_from_slice(b".rel.text\0");
720        }
721
722        (strtab, offsets)
723    }
724
725    /// Build symbol name string table
726    fn build_symbol_string_table(&self) -> (Vec<u8>, Vec<usize>) {
727        let mut strtab = vec![0]; // null string at offset 0
728        let mut offsets = Vec::new();
729
730        for symbol in &self.symbols {
731            let offset = strtab.len();
732            offsets.push(offset);
733            strtab.extend_from_slice(symbol.name.as_bytes());
734            strtab.push(0);
735        }
736
737        (strtab, offsets)
738    }
739
740    /// Build relocation table (ELF32 REL entries: 8 bytes each)
741    fn build_relocation_table(&self) -> Vec<u8> {
742        if self.relocations.is_empty() {
743            return Vec::new();
744        }
745
746        let mut rel_data = Vec::new();
747        for reloc in &self.relocations {
748            // r_offset (4 bytes)
749            rel_data.extend_from_slice(&reloc.offset.to_le_bytes());
750            // r_info (4 bytes) = (sym_index << 8) | type
751            let r_info = (reloc.symbol_index << 8) | (reloc.reloc_type as u32);
752            rel_data.extend_from_slice(&r_info.to_le_bytes());
753        }
754        rel_data
755    }
756
757    /// Build symbol table
758    fn build_symbol_table(&self, name_offsets: &[usize]) -> Vec<u8> {
759        let mut symtab = Vec::new();
760
761        // First entry is always null symbol
762        symtab.extend_from_slice(&[0u8; 16]); // 16 bytes per symbol in ELF32
763
764        // User symbols
765        for (i, symbol) in self.symbols.iter().enumerate() {
766            let name_offset = if i < name_offsets.len() {
767                name_offsets[i] as u32
768            } else {
769                0
770            };
771
772            // st_name (4 bytes)
773            symtab.extend_from_slice(&name_offset.to_le_bytes());
774
775            // st_value (4 bytes)
776            // For ARM targets, STT_FUNC symbols must have bit 0 set (Thumb interworking)
777            let value = if self.machine == ElfMachine::Arm && symbol.symbol_type == SymbolType::Func
778            {
779                symbol.value | 1
780            } else {
781                symbol.value
782            };
783            symtab.extend_from_slice(&value.to_le_bytes());
784
785            // st_size (4 bytes)
786            symtab.extend_from_slice(&symbol.size.to_le_bytes());
787
788            // st_info (1 byte) = (binding << 4) | (type & 0xf)
789            let info = ((symbol.binding as u8) << 4) | (symbol.symbol_type as u8 & 0xf);
790            symtab.push(info);
791
792            // st_other (1 byte)
793            symtab.push(0);
794
795            // st_shndx (2 bytes)
796            symtab.extend_from_slice(&symbol.section.to_le_bytes());
797        }
798
799        symtab
800    }
801
802    /// Build section headers (with optional .rel.text)
803    #[allow(clippy::too_many_arguments)]
804    fn build_section_headers_with_rel(
805        &self,
806        section_name_offsets: &[usize],
807        shstrtab_offset: usize,
808        shstrtab_data: &[u8],
809        strtab_offset: usize,
810        strtab_data: &[u8],
811        symtab_offset: usize,
812        symtab_data: &[u8],
813        section_offsets: &[usize],
814        rel_offset: usize,
815        rel_data: &[u8],
816    ) -> Vec<u8> {
817        let mut headers = Vec::new();
818
819        // Section header size is 40 bytes for ELF32
820
821        // Section 0: null section
822        headers.extend_from_slice(&[0u8; 40]);
823
824        // Section 1: .shstrtab
825        self.write_section_header(
826            &mut headers,
827            1,
828            SectionType::StrTab as u32,
829            0,
830            0,
831            shstrtab_offset as u32,
832            shstrtab_data.len() as u32,
833            0,
834            0,
835            1,
836            0,
837        );
838
839        // Section 2: .strtab
840        let strtab_name_offset = ".shstrtab\0".len();
841        self.write_section_header(
842            &mut headers,
843            strtab_name_offset as u32,
844            SectionType::StrTab as u32,
845            0,
846            0,
847            strtab_offset as u32,
848            strtab_data.len() as u32,
849            0,
850            0,
851            1,
852            0,
853        );
854
855        // Section 3: .symtab (links to .strtab which is section 2)
856        let symtab_name_offset = ".shstrtab\0.strtab\0".len();
857        self.write_section_header(
858            &mut headers,
859            symtab_name_offset as u32,
860            SectionType::SymTab as u32,
861            0,
862            0,
863            symtab_offset as u32,
864            symtab_data.len() as u32,
865            2,
866            1,
867            4,
868            16,
869        );
870
871        // User sections
872        for (i, section) in self.sections.iter().enumerate() {
873            let name_offset = if i < section_name_offsets.len() {
874                section_name_offsets[i] as u32
875            } else {
876                0
877            };
878            let offset = if i < section_offsets.len() {
879                section_offsets[i] as u32
880            } else {
881                0
882            };
883
884            self.write_section_header(
885                &mut headers,
886                name_offset,
887                section.section_type as u32,
888                section.flags,
889                section.addr,
890                offset,
891                section.size(),
892                0,
893                0,
894                section.align,
895                0,
896            );
897        }
898
899        // .rel.text section (if relocations exist)
900        if !rel_data.is_empty() {
901            let rel_name_offset = self.rel_text_shstrtab_offset();
902            // sh_link = symtab section index (3), sh_info = .text section index (4, first user section)
903            let text_section_idx = 4u32; // null(0) + shstrtab(1) + strtab(2) + symtab(3) + .text(4)
904            self.write_section_header(
905                &mut headers,
906                rel_name_offset as u32,
907                SectionType::Rel as u32,
908                0,
909                0,
910                rel_offset as u32,
911                rel_data.len() as u32,
912                3,                // sh_link = .symtab section index
913                text_section_idx, // sh_info = section to which relocations apply
914                4,
915                8, // Each REL entry is 8 bytes
916            );
917        }
918
919        headers
920    }
921
922    /// Compute the shstrtab offset where .rel.text name begins
923    fn rel_text_shstrtab_offset(&self) -> usize {
924        // Layout: \0 .shstrtab\0 .strtab\0 .symtab\0 [user sections...] .rel.text\0
925        let mut offset = 1 + ".shstrtab\0".len() + ".strtab\0".len() + ".symtab\0".len();
926        for section in &self.sections {
927            offset += section.name.len() + 1;
928        }
929        offset
930    }
931
932    /// Write a single section header
933    #[allow(clippy::too_many_arguments)]
934    fn write_section_header(
935        &self,
936        output: &mut Vec<u8>,
937        name: u32,
938        sh_type: u32,
939        flags: u32,
940        addr: u32,
941        offset: u32,
942        size: u32,
943        link: u32,
944        info: u32,
945        align: u32,
946        entsize: u32,
947    ) {
948        output.extend_from_slice(&name.to_le_bytes());
949        output.extend_from_slice(&sh_type.to_le_bytes());
950        output.extend_from_slice(&flags.to_le_bytes());
951        output.extend_from_slice(&addr.to_le_bytes());
952        output.extend_from_slice(&offset.to_le_bytes());
953        output.extend_from_slice(&size.to_le_bytes());
954        output.extend_from_slice(&link.to_le_bytes());
955        output.extend_from_slice(&info.to_le_bytes());
956        output.extend_from_slice(&align.to_le_bytes());
957        output.extend_from_slice(&entsize.to_le_bytes());
958    }
959
960    /// Write ELF header (legacy method for tests)
961    #[allow(dead_code)]
962    fn write_elf_header(&self, output: &mut Vec<u8>) -> Result<()> {
963        // ELF magic number
964        output.extend_from_slice(&[0x7f, b'E', b'L', b'F']);
965
966        // Class (32-bit)
967        output.push(self.class as u8);
968
969        // Data (little-endian)
970        output.push(self.data as u8);
971
972        // Version
973        output.push(1);
974
975        // OS/ABI
976        output.push(0); // System V
977
978        // ABI version
979        output.push(0);
980
981        // Padding
982        output.extend_from_slice(&[0; 7]);
983
984        // Type (little-endian u16)
985        let etype = self.elf_type as u16;
986        output.extend_from_slice(&etype.to_le_bytes());
987
988        // Machine (little-endian u16)
989        let machine = self.machine as u16;
990        output.extend_from_slice(&machine.to_le_bytes());
991
992        // Version (little-endian u32)
993        output.extend_from_slice(&1u32.to_le_bytes());
994
995        // Entry point (little-endian u32)
996        output.extend_from_slice(&self.entry.to_le_bytes());
997
998        // Program header offset (little-endian u32)
999        output.extend_from_slice(&0u32.to_le_bytes());
1000
1001        // Section header offset (little-endian u32)
1002        output.extend_from_slice(&0u32.to_le_bytes());
1003
1004        // Flags (little-endian u32)
1005        output.extend_from_slice(&0u32.to_le_bytes());
1006
1007        // ELF header size (little-endian u16)
1008        output.extend_from_slice(&52u16.to_le_bytes());
1009
1010        // Program header entry size (little-endian u16)
1011        output.extend_from_slice(&0u16.to_le_bytes());
1012
1013        // Program header count (little-endian u16)
1014        output.extend_from_slice(&0u16.to_le_bytes());
1015
1016        // Section header entry size (little-endian u16)
1017        output.extend_from_slice(&40u16.to_le_bytes());
1018
1019        // Section header count (little-endian u16)
1020        output.extend_from_slice(&0u16.to_le_bytes());
1021
1022        // Section header string table index (little-endian u16)
1023        output.extend_from_slice(&0u16.to_le_bytes());
1024
1025        Ok(())
1026    }
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031    use super::*;
1032
1033    #[test]
1034    fn test_elf_builder_creation() {
1035        let builder = ElfBuilder::new_arm32();
1036        assert_eq!(builder.class, ElfClass::Elf32);
1037        assert_eq!(builder.data, ElfData::LittleEndian);
1038        assert_eq!(builder.machine, ElfMachine::Arm);
1039    }
1040
1041    #[test]
1042    fn test_section_creation() {
1043        let section = Section::new(".text", SectionType::ProgBits)
1044            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1045            .with_addr(0x8000)
1046            .with_align(4);
1047
1048        assert_eq!(section.name, ".text");
1049        assert_eq!(section.section_type, SectionType::ProgBits);
1050        assert_eq!(section.addr, 0x8000);
1051        assert_eq!(section.align, 4);
1052    }
1053
1054    #[test]
1055    fn test_symbol_creation() {
1056        let symbol = Symbol::new("main")
1057            .with_value(0x8000)
1058            .with_size(128)
1059            .with_binding(SymbolBinding::Global)
1060            .with_type(SymbolType::Func)
1061            .with_section(1);
1062
1063        assert_eq!(symbol.name, "main");
1064        assert_eq!(symbol.value, 0x8000);
1065        assert_eq!(symbol.size, 128);
1066        assert_eq!(symbol.binding, SymbolBinding::Global);
1067        assert_eq!(symbol.symbol_type, SymbolType::Func);
1068    }
1069
1070    #[test]
1071    fn test_elf_header_generation() {
1072        let builder = ElfBuilder::new_arm32().with_entry(0x8000);
1073        let elf = builder.build().unwrap();
1074
1075        // Check magic number
1076        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1077
1078        // Check class (32-bit)
1079        assert_eq!(elf[4], 1);
1080
1081        // Check data (little-endian)
1082        assert_eq!(elf[5], 1);
1083
1084        // Check version
1085        assert_eq!(elf[6], 1);
1086    }
1087
1088    #[test]
1089    fn test_add_sections() {
1090        let mut builder = ElfBuilder::new_arm32();
1091
1092        let text = Section::new(".text", SectionType::ProgBits)
1093            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC);
1094
1095        let data = Section::new(".data", SectionType::ProgBits)
1096            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE);
1097
1098        builder.add_section(text);
1099        builder.add_section(data);
1100
1101        assert_eq!(builder.sections.len(), 2);
1102    }
1103
1104    #[test]
1105    fn test_add_symbols() {
1106        let mut builder = ElfBuilder::new_arm32();
1107
1108        let main_sym = Symbol::new("main")
1109            .with_binding(SymbolBinding::Global)
1110            .with_type(SymbolType::Func);
1111
1112        let data_sym = Symbol::new("data")
1113            .with_binding(SymbolBinding::Local)
1114            .with_type(SymbolType::Object);
1115
1116        builder.add_symbol(main_sym);
1117        builder.add_symbol(data_sym);
1118
1119        assert_eq!(builder.symbols.len(), 2);
1120    }
1121
1122    #[test]
1123    fn test_complete_elf_generation() {
1124        // Create a complete ELF file with sections and symbols
1125        let mut builder = ElfBuilder::new_arm32()
1126            .with_entry(0x8000)
1127            .with_type(ElfType::Exec);
1128
1129        // Add .text section with some ARM code
1130        let text_code = vec![
1131            0x00, 0x48, 0x2d, 0xe9, // push {fp, lr}
1132            0x04, 0xb0, 0x8d, 0xe2, // add fp, sp, #4
1133            0x00, 0x00, 0xa0, 0xe3, // mov r0, #0
1134            0x00, 0x88, 0xbd, 0xe8, // pop {fp, pc}
1135        ];
1136        let text = Section::new(".text", SectionType::ProgBits)
1137            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1138            .with_addr(0x8000)
1139            .with_align(4)
1140            .with_data(text_code);
1141
1142        builder.add_section(text);
1143
1144        // Add .data section
1145        let data_content = vec![0x01, 0x02, 0x03, 0x04];
1146        let data = Section::new(".data", SectionType::ProgBits)
1147            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE)
1148            .with_addr(0x8100)
1149            .with_align(4)
1150            .with_data(data_content);
1151
1152        builder.add_section(data);
1153
1154        // Add .bss section (no data)
1155        let bss = Section::new(".bss", SectionType::NoBits)
1156            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE)
1157            .with_addr(0x8200)
1158            .with_align(4);
1159
1160        builder.add_section(bss);
1161
1162        // Add symbols
1163        let main_sym = Symbol::new("main")
1164            .with_value(0x8000)
1165            .with_size(16)
1166            .with_binding(SymbolBinding::Global)
1167            .with_type(SymbolType::Func)
1168            .with_section(4); // .text is section 4 (0=null, 1=shstrtab, 2=strtab, 3=symtab, 4=.text)
1169
1170        builder.add_symbol(main_sym);
1171
1172        let data_var = Symbol::new("global_var")
1173            .with_value(0x8100)
1174            .with_size(4)
1175            .with_binding(SymbolBinding::Global)
1176            .with_type(SymbolType::Object)
1177            .with_section(5); // .data is section 5
1178
1179        builder.add_symbol(data_var);
1180
1181        // Build the ELF file
1182        let elf = builder.build().unwrap();
1183
1184        // Validate ELF header
1185        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1186        assert_eq!(elf[4], 1); // 32-bit
1187        assert_eq!(elf[5], 1); // little-endian
1188        assert_eq!(elf[6], 1); // version
1189
1190        // Check that we have a reasonable file size
1191        assert!(elf.len() > 52); // At least header size
1192        assert!(elf.len() < 10000); // Reasonable upper bound
1193
1194        // Validate entry point is set correctly (Thumb bit set for ARM)
1195        let entry_bytes = &elf[24..28];
1196        let entry = u32::from_le_bytes([
1197            entry_bytes[0],
1198            entry_bytes[1],
1199            entry_bytes[2],
1200            entry_bytes[3],
1201        ]);
1202        assert_eq!(entry, 0x8001); // 0x8000 | 1 (Thumb bit)
1203
1204        // Validate section header offset is non-zero
1205        let sh_off_bytes = &elf[32..36];
1206        let sh_off = u32::from_le_bytes([
1207            sh_off_bytes[0],
1208            sh_off_bytes[1],
1209            sh_off_bytes[2],
1210            sh_off_bytes[3],
1211        ]);
1212        assert!(sh_off > 0);
1213
1214        // Validate section count (null + shstrtab + strtab + symtab + .text + .data + .bss = 7)
1215        let sh_num_bytes = &elf[48..50];
1216        let sh_num = u16::from_le_bytes([sh_num_bytes[0], sh_num_bytes[1]]);
1217        assert_eq!(sh_num, 7);
1218
1219        // Validate string table index points to .shstrtab (section 1)
1220        let shstrndx_bytes = &elf[50..52];
1221        let shstrndx = u16::from_le_bytes([shstrndx_bytes[0], shstrndx_bytes[1]]);
1222        assert_eq!(shstrndx, 1);
1223    }
1224
1225    #[test]
1226    fn test_string_table_generation() {
1227        let mut builder = ElfBuilder::new_arm32();
1228
1229        builder.add_section(Section::new(".text", SectionType::ProgBits));
1230        builder.add_section(Section::new(".data", SectionType::ProgBits));
1231
1232        let (strtab, offsets) = builder.build_section_string_table();
1233
1234        // Should have null byte at start
1235        assert_eq!(strtab[0], 0);
1236
1237        // Should contain .shstrtab, .strtab, .symtab, .text, .data
1238        let strtab_str = String::from_utf8_lossy(&strtab);
1239        assert!(strtab_str.contains(".shstrtab"));
1240        assert!(strtab_str.contains(".strtab"));
1241        assert!(strtab_str.contains(".symtab"));
1242        assert!(strtab_str.contains(".text"));
1243        assert!(strtab_str.contains(".data"));
1244
1245        // Should have offsets for user sections
1246        assert_eq!(offsets.len(), 2);
1247    }
1248
1249    #[test]
1250    fn test_relocation_support() {
1251        let mut builder = ElfBuilder::new_arm32()
1252            .with_entry(0x8000)
1253            .with_type(ElfType::Rel);
1254
1255        // Add .text section with a BL placeholder
1256        let text_code = vec![0x00u8; 16]; // 4 instructions of placeholder
1257        let text = Section::new(".text", SectionType::ProgBits)
1258            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1259            .with_addr(0x8000)
1260            .with_align(4)
1261            .with_data(text_code);
1262        builder.add_section(text);
1263
1264        // Add undefined external symbol
1265        let sym_idx = builder.add_undefined_symbol("__meld_dispatch_import");
1266        assert!(sym_idx > 0);
1267
1268        // Add relocation for the BL at offset 4
1269        builder.add_relocation(Relocation {
1270            offset: 4,
1271            symbol_index: sym_idx,
1272            reloc_type: ArmRelocationType::Call,
1273        });
1274
1275        let elf = builder.build().unwrap();
1276
1277        // Verify ELF is valid
1278        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1279
1280        // Section count should include .rel.text
1281        // null(1) + shstrtab(1) + strtab(1) + symtab(1) + .text(1) + .rel.text(1) = 6
1282        let sh_num = u16::from_le_bytes([elf[48], elf[49]]);
1283        assert_eq!(sh_num, 6);
1284
1285        // Verify the symbol table contains the undefined symbol
1286        // (section = 0 for SHN_UNDEF)
1287        let has_undef = elf
1288            .windows(b"__meld_dispatch_import".len())
1289            .any(|w| w == b"__meld_dispatch_import");
1290        assert!(
1291            has_undef,
1292            "ELF should contain __meld_dispatch_import symbol name"
1293        );
1294    }
1295
1296    #[test]
1297    fn test_symbol_table_encoding() {
1298        let mut builder = ElfBuilder::new_arm32();
1299
1300        let sym = Symbol::new("test_func")
1301            .with_value(0x1000)
1302            .with_size(64)
1303            .with_binding(SymbolBinding::Global)
1304            .with_type(SymbolType::Func)
1305            .with_section(1);
1306
1307        builder.add_symbol(sym);
1308
1309        let (_strtab, offsets) = builder.build_symbol_string_table();
1310        let symtab = builder.build_symbol_table(&offsets);
1311
1312        // Should have null symbol (16 bytes) + 1 symbol (16 bytes) = 32 bytes
1313        assert_eq!(symtab.len(), 32);
1314
1315        // First symbol should be all zeros
1316        assert!(symtab[0..16].iter().all(|&b| b == 0));
1317
1318        // Second symbol should have correct encoding
1319        // Check st_value (bytes 4-7 of second entry)
1320        // For ARM STT_FUNC symbols, bit 0 is set for Thumb interworking
1321        let value_bytes = &symtab[20..24];
1322        let value = u32::from_le_bytes([
1323            value_bytes[0],
1324            value_bytes[1],
1325            value_bytes[2],
1326            value_bytes[3],
1327        ]);
1328        assert_eq!(value, 0x1001); // 0x1000 | 1 (Thumb bit)
1329
1330        // Check st_size (bytes 8-11 of second entry)
1331        let size_bytes = &symtab[24..28];
1332        let size = u32::from_le_bytes([size_bytes[0], size_bytes[1], size_bytes[2], size_bytes[3]]);
1333        assert_eq!(size, 64);
1334
1335        // Check st_info (byte 12 of second entry)
1336        let info = symtab[28];
1337        let binding = info >> 4;
1338        let sym_type = info & 0xf;
1339        assert_eq!(binding, SymbolBinding::Global as u8);
1340        assert_eq!(sym_type, SymbolType::Func as u8);
1341    }
1342}