Skip to main content

synth_backend/
elf_builder.rs

1//! ELF (Executable and Linkable Format) Builder for ARM
2//!
3//! Generates ELF32 files for ARM Cortex-M targets
4
5use synth_core::Result;
6
7/// ELF file class
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ElfClass {
10    /// 32-bit
11    Elf32 = 1,
12    /// 64-bit
13    Elf64 = 2,
14}
15
16/// ELF data encoding
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ElfData {
19    /// Little-endian
20    LittleEndian = 1,
21    /// Big-endian
22    BigEndian = 2,
23}
24
25/// ELF file type
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum ElfType {
28    /// Relocatable file
29    Rel = 1,
30    /// Executable file
31    Exec = 2,
32    /// Shared object file
33    Dyn = 3,
34}
35
36/// ELF machine architecture
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ElfMachine {
39    /// ARM
40    Arm = 40,
41    /// ARM64/AArch64
42    AArch64 = 183,
43}
44
45/// Section type
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum SectionType {
48    /// Null section
49    Null = 0,
50    /// Program data
51    ProgBits = 1,
52    /// Symbol table
53    SymTab = 2,
54    /// String table
55    StrTab = 3,
56    /// Relocation entries with addends
57    Rela = 4,
58    /// Symbol hash table
59    Hash = 5,
60    /// Dynamic linking information
61    Dynamic = 6,
62    /// Note
63    Note = 7,
64    /// No space (BSS)
65    NoBits = 8,
66    /// Relocation entries
67    Rel = 9,
68}
69
70/// Section flags
71#[derive(Debug, Clone, Copy)]
72pub struct SectionFlags(pub u32);
73
74impl SectionFlags {
75    /// Writable
76    pub const WRITE: u32 = 0x1;
77    /// Occupies memory during execution
78    pub const ALLOC: u32 = 0x2;
79    /// Executable
80    pub const EXEC: u32 = 0x4;
81    /// Mergeable
82    pub const MERGE: u32 = 0x10;
83    /// Contains null-terminated strings
84    pub const STRINGS: u32 = 0x20;
85}
86
87/// ELF section
88#[derive(Debug, Clone)]
89pub struct Section {
90    /// Section name (index into string table)
91    pub name: String,
92    /// Section type
93    pub section_type: SectionType,
94    /// Section flags
95    pub flags: u32,
96    /// Virtual address
97    pub addr: u32,
98    /// Section data
99    pub data: Vec<u8>,
100    /// Alignment
101    pub align: u32,
102    /// Explicit size (for NoBits sections like .bss where data is empty)
103    pub explicit_size: Option<u32>,
104}
105
106impl Section {
107    /// Create a new section
108    pub fn new(name: &str, section_type: SectionType) -> Self {
109        Self {
110            name: name.to_string(),
111            section_type,
112            flags: 0,
113            addr: 0,
114            data: Vec::new(),
115            align: 1,
116            explicit_size: None,
117        }
118    }
119
120    /// Set flags
121    pub fn with_flags(mut self, flags: u32) -> Self {
122        self.flags = flags;
123        self
124    }
125
126    /// Set address
127    pub fn with_addr(mut self, addr: u32) -> Self {
128        self.addr = addr;
129        self
130    }
131
132    /// Set alignment
133    pub fn with_align(mut self, align: u32) -> Self {
134        self.align = align;
135        self
136    }
137
138    /// Add data
139    pub fn with_data(mut self, data: Vec<u8>) -> Self {
140        self.data = data;
141        self
142    }
143
144    /// Set explicit size (for NoBits sections like .bss where data is empty)
145    pub fn with_size(mut self, size: u32) -> Self {
146        self.explicit_size = Some(size);
147        self
148    }
149
150    /// Get the effective size of the section
151    pub fn size(&self) -> u32 {
152        self.explicit_size.unwrap_or(self.data.len() as u32)
153    }
154}
155
156/// Symbol binding
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum SymbolBinding {
159    /// Local symbol
160    Local = 0,
161    /// Global symbol
162    Global = 1,
163    /// Weak symbol
164    Weak = 2,
165}
166
167/// Symbol type
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum SymbolType {
170    /// No type
171    NoType = 0,
172    /// Object (data)
173    Object = 1,
174    /// Function
175    Func = 2,
176    /// Section
177    Section = 3,
178    /// File name
179    File = 4,
180}
181
182/// ELF symbol
183#[derive(Debug, Clone)]
184pub struct Symbol {
185    /// Symbol name
186    pub name: String,
187    /// Value/address
188    pub value: u32,
189    /// Size
190    pub size: u32,
191    /// Binding
192    pub binding: SymbolBinding,
193    /// Type
194    pub symbol_type: SymbolType,
195    /// Section index
196    pub section: u16,
197}
198
199impl Symbol {
200    /// Create a new symbol
201    pub fn new(name: &str) -> Self {
202        Self {
203            name: name.to_string(),
204            value: 0,
205            size: 0,
206            binding: SymbolBinding::Local,
207            symbol_type: SymbolType::NoType,
208            section: 0,
209        }
210    }
211
212    /// Set value
213    pub fn with_value(mut self, value: u32) -> Self {
214        self.value = value;
215        self
216    }
217
218    /// Set size
219    pub fn with_size(mut self, size: u32) -> Self {
220        self.size = size;
221        self
222    }
223
224    /// Set binding
225    pub fn with_binding(mut self, binding: SymbolBinding) -> Self {
226        self.binding = binding;
227        self
228    }
229
230    /// Set type
231    pub fn with_type(mut self, symbol_type: SymbolType) -> Self {
232        self.symbol_type = symbol_type;
233        self
234    }
235
236    /// Set section
237    pub fn with_section(mut self, section: u16) -> Self {
238        self.section = section;
239        self
240    }
241}
242
243/// Program header type
244#[derive(Debug, Clone, Copy, PartialEq, Eq)]
245pub enum ProgramType {
246    /// Null entry
247    Null = 0,
248    /// Loadable segment
249    Load = 1,
250    /// Dynamic linking info
251    Dynamic = 2,
252    /// Interpreter path
253    Interp = 3,
254    /// Note section
255    Note = 4,
256}
257
258/// Program header flags
259pub struct ProgramFlags;
260
261impl ProgramFlags {
262    /// Executable
263    pub const EXEC: u32 = 0x1;
264    /// Writable
265    pub const WRITE: u32 = 0x2;
266    /// Readable
267    pub const READ: u32 = 0x4;
268}
269
270/// ELF program header (segment)
271#[derive(Debug, Clone)]
272pub struct ProgramHeader {
273    /// Segment type
274    pub p_type: ProgramType,
275    /// Offset in file
276    pub offset: u32,
277    /// Virtual address
278    pub vaddr: u32,
279    /// Physical address
280    pub paddr: u32,
281    /// Size in file
282    pub filesz: u32,
283    /// Size in memory
284    pub memsz: u32,
285    /// Flags (R/W/X)
286    pub flags: u32,
287    /// Alignment
288    pub align: u32,
289}
290
291impl ProgramHeader {
292    /// Create a new LOAD segment
293    pub fn load(vaddr: u32, offset: u32, size: u32, flags: u32) -> Self {
294        Self {
295            p_type: ProgramType::Load,
296            offset,
297            vaddr,
298            paddr: vaddr, // Physical = virtual for simple cases
299            filesz: size,
300            memsz: size,
301            flags,
302            align: 4,
303        }
304    }
305
306    /// Create a new LOAD segment for BSS-like regions (no file data, only memory)
307    /// Used for .bss, linear memory, and other zero-initialized regions
308    pub fn load_nobits(vaddr: u32, memsz: u32, flags: u32) -> Self {
309        Self {
310            p_type: ProgramType::Load,
311            offset: 0, // No file offset for NoBits
312            vaddr,
313            paddr: vaddr, // Physical = virtual
314            filesz: 0,    // No file data
315            memsz,        // Memory size to allocate
316            flags,
317            align: 4,
318        }
319    }
320}
321
322/// ARM relocation type
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum ArmRelocationType {
325    /// R_ARM_THM_CALL (10) — Thumb BL/BLX instruction (Cortex-M). This is the
326    /// correct relocation for a Thumb-2 `bl` call site; `Call`/R_ARM_CALL below
327    /// is the ARM-mode form and is mis-resolved by `ld` for Thumb calls.
328    ThmCall = 10,
329    /// R_ARM_CALL (28) — BL/BLX instruction
330    Call = 28,
331    /// R_ARM_JUMP24 (29) — B/BL<cond> instruction
332    Jump24 = 29,
333    /// R_ARM_ABS32 (2) — Direct 32-bit reference
334    Abs32 = 2,
335    /// R_ARM_MOVW_ABS_NC (43) — MOVW instruction (low 16 bits)
336    MovwAbsNc = 43,
337    /// R_ARM_MOVT_ABS (44) — MOVT instruction (high 16 bits)
338    MovtAbs = 44,
339}
340
341/// ELF relocation entry (REL format, no addend)
342#[derive(Debug, Clone)]
343pub struct Relocation {
344    /// Offset within the section where the relocation applies
345    pub offset: u32,
346    /// Symbol index in the symbol table
347    pub symbol_index: u32,
348    /// Relocation type
349    pub reloc_type: ArmRelocationType,
350}
351
352/// ARM EABI version 5 (soft-float)
353pub const EF_ARM_EABI_VER5: u32 = 0x05000000;
354/// ARM hard-float ABI flag
355pub const EF_ARM_ABI_FLOAT_HARD: u32 = 0x00000400;
356/// ARM soft-float ABI flag
357pub const EF_ARM_ABI_FLOAT_SOFT: u32 = 0x00000200;
358
359/// ELF file builder
360pub struct ElfBuilder {
361    /// File class (32 or 64 bit)
362    class: ElfClass,
363    /// Data encoding
364    data: ElfData,
365    /// File type
366    elf_type: ElfType,
367    /// Machine architecture
368    machine: ElfMachine,
369    /// Entry point address
370    entry: u32,
371    /// ELF e_flags (EABI version + float ABI)
372    e_flags: u32,
373    /// Sections
374    sections: Vec<Section>,
375    /// Symbols
376    symbols: Vec<Symbol>,
377    /// Program headers (segments)
378    program_headers: Vec<ProgramHeader>,
379    /// Relocations for .text section
380    relocations: Vec<Relocation>,
381}
382
383impl ElfBuilder {
384    /// Create a new ELF builder for ARM32
385    pub fn new_arm32() -> Self {
386        Self {
387            class: ElfClass::Elf32,
388            data: ElfData::LittleEndian,
389            elf_type: ElfType::Exec,
390            machine: ElfMachine::Arm,
391            entry: 0,
392            e_flags: EF_ARM_EABI_VER5,
393            sections: Vec::new(),
394            symbols: Vec::new(),
395            program_headers: Vec::new(),
396            relocations: Vec::new(),
397        }
398    }
399
400    /// Set entry point
401    ///
402    /// For ARM (Thumb) targets, bit 0 is automatically set to indicate Thumb mode.
403    /// Cortex-M is Thumb-only, so function addresses in ELF must have bit 0 set.
404    pub fn with_entry(mut self, entry: u32) -> Self {
405        self.entry = if self.machine == ElfMachine::Arm {
406            entry | 1 // Set Thumb bit for ARM targets
407        } else {
408            entry
409        };
410        self
411    }
412
413    /// Set ELF e_flags (e.g. to add hard-float ABI)
414    pub fn set_flags(&mut self, flags: u32) {
415        self.e_flags = flags;
416    }
417
418    /// Set file type
419    pub fn with_type(mut self, elf_type: ElfType) -> Self {
420        self.elf_type = elf_type;
421        self
422    }
423
424    /// Add a section
425    pub fn add_section(&mut self, section: Section) {
426        self.sections.push(section);
427    }
428
429    /// Add a symbol
430    pub fn add_symbol(&mut self, symbol: Symbol) {
431        self.symbols.push(symbol);
432    }
433
434    /// Add a program header (segment)
435    pub fn add_program_header(&mut self, ph: ProgramHeader) {
436        self.program_headers.push(ph);
437    }
438
439    /// Add a relocation entry for the .text section
440    pub fn add_relocation(&mut self, reloc: Relocation) {
441        self.relocations.push(reloc);
442    }
443
444    /// Add an undefined external symbol (e.g., __meld_dispatch_import)
445    /// Returns the symbol index (1-based, accounting for null symbol)
446    pub fn add_undefined_symbol(&mut self, name: &str) -> u32 {
447        let index = self.symbols.len() as u32 + 1; // +1 for null symbol
448        self.symbols.push(Symbol {
449            name: name.to_string(),
450            value: 0,
451            size: 0,
452            binding: SymbolBinding::Global,
453            symbol_type: SymbolType::Func,
454            section: 0, // SHN_UNDEF
455        });
456        index
457    }
458
459    /// Build the ELF file to bytes
460    pub fn build(&self) -> Result<Vec<u8>> {
461        let mut output = Vec::new();
462
463        // ELF header size (52 bytes for ELF32)
464        let header_size = 52;
465        // Program header size (32 bytes for ELF32)
466        let ph_entry_size = 32;
467        let ph_count = self.program_headers.len();
468        let ph_table_size = ph_entry_size * ph_count;
469
470        // Reserve space for ELF header + program headers
471        output.resize(header_size + ph_table_size, 0);
472
473        // Build string table for section names
474        let (shstrtab_data, section_name_offsets) = self.build_section_string_table();
475
476        // Build symbol string table
477        let (strtab_data, symbol_name_offsets) = self.build_symbol_string_table();
478
479        // Calculate section offsets (after ELF header + program headers)
480        let mut current_offset = header_size + ph_table_size;
481
482        // Section 1: .shstrtab (section name string table)
483        let shstrtab_offset = current_offset;
484        current_offset += shstrtab_data.len();
485
486        // Section 2: .strtab (symbol name string table)
487        let strtab_offset = current_offset;
488        current_offset += strtab_data.len();
489
490        // User sections
491        let mut section_offsets = Vec::new();
492        for section in &self.sections {
493            section_offsets.push(current_offset);
494            current_offset += section.data.len();
495        }
496
497        // Section 3: .symtab (symbol table)
498        let symtab_offset = current_offset;
499        let symtab_data = self.build_symbol_table(&symbol_name_offsets);
500        current_offset += symtab_data.len();
501
502        // Section 4+ (optional): .rel.text (relocations)
503        let rel_data = self.build_relocation_table();
504        let rel_offset = current_offset;
505        current_offset += rel_data.len();
506
507        // Section header table comes at the end
508        let sh_offset = current_offset;
509
510        // Now write all the data
511        output.extend_from_slice(&shstrtab_data);
512        output.extend_from_slice(&strtab_data);
513
514        for section in &self.sections {
515            output.extend_from_slice(&section.data);
516        }
517
518        output.extend_from_slice(&symtab_data);
519        output.extend_from_slice(&rel_data);
520
521        // Write section headers
522        let section_headers = self.build_section_headers_with_rel(
523            &section_name_offsets,
524            shstrtab_offset,
525            &shstrtab_data,
526            strtab_offset,
527            &strtab_data,
528            symtab_offset,
529            &symtab_data,
530            &section_offsets,
531            rel_offset,
532            &rel_data,
533        );
534        output.extend_from_slice(&section_headers);
535
536        // Write program headers (right after ELF header)
537        // Auto-correct p_offset for LOAD segments by matching vaddr to section addrs
538        for (i, ph) in self.program_headers.iter().enumerate() {
539            let ph_offset = header_size + i * ph_entry_size;
540            let mut corrected_ph = ph.clone();
541            if corrected_ph.filesz > 0 {
542                // Find the section whose addr matches this segment's vaddr
543                for (si, section) in self.sections.iter().enumerate() {
544                    if section.addr == corrected_ph.vaddr && si < section_offsets.len() {
545                        corrected_ph.offset = section_offsets[si] as u32;
546                        break;
547                    }
548                }
549            }
550            self.write_program_header(
551                &mut output[ph_offset..ph_offset + ph_entry_size],
552                &corrected_ph,
553            );
554        }
555
556        // Now write the actual ELF header at the beginning
557        let has_rel = !self.relocations.is_empty();
558        let num_sections = 4 + self.sections.len() + if has_rel { 1 } else { 0 };
559        let ph_offset = if ph_count > 0 { header_size as u32 } else { 0 };
560        self.write_elf_header_with_phdrs(
561            &mut output[0..header_size],
562            ph_offset,
563            ph_count as u16,
564            sh_offset as u32,
565            num_sections as u16,
566        )?;
567
568        Ok(output)
569    }
570
571    /// Write a single program header
572    fn write_program_header(&self, output: &mut [u8], ph: &ProgramHeader) {
573        let mut cursor = 0;
574
575        // p_type (4 bytes)
576        output[cursor..cursor + 4].copy_from_slice(&(ph.p_type as u32).to_le_bytes());
577        cursor += 4;
578
579        // p_offset (4 bytes)
580        output[cursor..cursor + 4].copy_from_slice(&ph.offset.to_le_bytes());
581        cursor += 4;
582
583        // p_vaddr (4 bytes)
584        output[cursor..cursor + 4].copy_from_slice(&ph.vaddr.to_le_bytes());
585        cursor += 4;
586
587        // p_paddr (4 bytes)
588        output[cursor..cursor + 4].copy_from_slice(&ph.paddr.to_le_bytes());
589        cursor += 4;
590
591        // p_filesz (4 bytes)
592        output[cursor..cursor + 4].copy_from_slice(&ph.filesz.to_le_bytes());
593        cursor += 4;
594
595        // p_memsz (4 bytes)
596        output[cursor..cursor + 4].copy_from_slice(&ph.memsz.to_le_bytes());
597        cursor += 4;
598
599        // p_flags (4 bytes)
600        output[cursor..cursor + 4].copy_from_slice(&ph.flags.to_le_bytes());
601        cursor += 4;
602
603        // p_align (4 bytes)
604        output[cursor..cursor + 4].copy_from_slice(&ph.align.to_le_bytes());
605    }
606
607    /// Write ELF header with program header info
608    fn write_elf_header_with_phdrs(
609        &self,
610        output: &mut [u8],
611        ph_offset: u32,
612        ph_count: u16,
613        sh_offset: u32,
614        sh_count: u16,
615    ) -> Result<()> {
616        let mut cursor = 0;
617
618        // ELF magic number
619        output[cursor..cursor + 4].copy_from_slice(&[0x7f, b'E', b'L', b'F']);
620        cursor += 4;
621
622        // Class (32-bit)
623        output[cursor] = self.class as u8;
624        cursor += 1;
625
626        // Data (little-endian)
627        output[cursor] = self.data as u8;
628        cursor += 1;
629
630        // Version
631        output[cursor] = 1;
632        cursor += 1;
633
634        // OS/ABI
635        output[cursor] = 0; // System V
636        cursor += 1;
637
638        // ABI version
639        output[cursor] = 0;
640        cursor += 1;
641
642        // Padding (7 bytes)
643        output[cursor..cursor + 7].copy_from_slice(&[0; 7]);
644        cursor += 7;
645
646        // Type (little-endian u16)
647        let etype = self.elf_type as u16;
648        output[cursor..cursor + 2].copy_from_slice(&etype.to_le_bytes());
649        cursor += 2;
650
651        // Machine (little-endian u16)
652        let machine = self.machine as u16;
653        output[cursor..cursor + 2].copy_from_slice(&machine.to_le_bytes());
654        cursor += 2;
655
656        // Version (little-endian u32)
657        output[cursor..cursor + 4].copy_from_slice(&1u32.to_le_bytes());
658        cursor += 4;
659
660        // Entry point (little-endian u32)
661        output[cursor..cursor + 4].copy_from_slice(&self.entry.to_le_bytes());
662        cursor += 4;
663
664        // Program header offset (little-endian u32)
665        output[cursor..cursor + 4].copy_from_slice(&ph_offset.to_le_bytes());
666        cursor += 4;
667
668        // Section header offset (little-endian u32)
669        output[cursor..cursor + 4].copy_from_slice(&sh_offset.to_le_bytes());
670        cursor += 4;
671
672        // Flags (little-endian u32) - ARM EABI version 5 + float ABI
673        output[cursor..cursor + 4].copy_from_slice(&self.e_flags.to_le_bytes());
674        cursor += 4;
675
676        // ELF header size (little-endian u16)
677        output[cursor..cursor + 2].copy_from_slice(&52u16.to_le_bytes());
678        cursor += 2;
679
680        // Program header entry size (little-endian u16)
681        let ph_entry_size: u16 = if ph_count > 0 { 32 } else { 0 };
682        output[cursor..cursor + 2].copy_from_slice(&ph_entry_size.to_le_bytes());
683        cursor += 2;
684
685        // Program header count (little-endian u16)
686        output[cursor..cursor + 2].copy_from_slice(&ph_count.to_le_bytes());
687        cursor += 2;
688
689        // Section header entry size (little-endian u16)
690        output[cursor..cursor + 2].copy_from_slice(&40u16.to_le_bytes());
691        cursor += 2;
692
693        // Section header count (little-endian u16)
694        output[cursor..cursor + 2].copy_from_slice(&sh_count.to_le_bytes());
695        cursor += 2;
696
697        // Section header string table index (little-endian u16) - .shstrtab is section 1
698        output[cursor..cursor + 2].copy_from_slice(&1u16.to_le_bytes());
699
700        Ok(())
701    }
702
703    /// Build section name string table
704    fn build_section_string_table(&self) -> (Vec<u8>, Vec<usize>) {
705        let mut strtab = vec![0]; // null string at offset 0
706        let mut offsets = Vec::new();
707
708        // Standard sections
709        strtab.extend_from_slice(b".shstrtab\0");
710        strtab.extend_from_slice(b".strtab\0");
711        strtab.extend_from_slice(b".symtab\0");
712
713        // User sections
714        for section in &self.sections {
715            let offset = strtab.len();
716            offsets.push(offset);
717            strtab.extend_from_slice(section.name.as_bytes());
718            strtab.push(0);
719        }
720
721        // .rel.text (if relocations exist)
722        if !self.relocations.is_empty() {
723            strtab.extend_from_slice(b".rel.text\0");
724        }
725
726        (strtab, offsets)
727    }
728
729    /// Build symbol name string table
730    fn build_symbol_string_table(&self) -> (Vec<u8>, Vec<usize>) {
731        let mut strtab = vec![0]; // null string at offset 0
732        let mut offsets = Vec::new();
733
734        for symbol in &self.symbols {
735            let offset = strtab.len();
736            offsets.push(offset);
737            strtab.extend_from_slice(symbol.name.as_bytes());
738            strtab.push(0);
739        }
740
741        (strtab, offsets)
742    }
743
744    /// Build relocation table (ELF32 REL entries: 8 bytes each)
745    fn build_relocation_table(&self) -> Vec<u8> {
746        if self.relocations.is_empty() {
747            return Vec::new();
748        }
749
750        let mut rel_data = Vec::new();
751        for reloc in &self.relocations {
752            // r_offset (4 bytes)
753            rel_data.extend_from_slice(&reloc.offset.to_le_bytes());
754            // r_info (4 bytes) = (sym_index << 8) | type
755            let r_info = (reloc.symbol_index << 8) | (reloc.reloc_type as u32);
756            rel_data.extend_from_slice(&r_info.to_le_bytes());
757        }
758        rel_data
759    }
760
761    /// Build symbol table
762    fn build_symbol_table(&self, name_offsets: &[usize]) -> Vec<u8> {
763        let mut symtab = Vec::new();
764
765        // First entry is always null symbol
766        symtab.extend_from_slice(&[0u8; 16]); // 16 bytes per symbol in ELF32
767
768        // User symbols
769        for (i, symbol) in self.symbols.iter().enumerate() {
770            let name_offset = if i < name_offsets.len() {
771                name_offsets[i] as u32
772            } else {
773                0
774            };
775
776            // st_name (4 bytes)
777            symtab.extend_from_slice(&name_offset.to_le_bytes());
778
779            // st_value (4 bytes)
780            // For ARM targets, STT_FUNC symbols must have bit 0 set (Thumb interworking)
781            let value = if self.machine == ElfMachine::Arm && symbol.symbol_type == SymbolType::Func
782            {
783                symbol.value | 1
784            } else {
785                symbol.value
786            };
787            symtab.extend_from_slice(&value.to_le_bytes());
788
789            // st_size (4 bytes)
790            symtab.extend_from_slice(&symbol.size.to_le_bytes());
791
792            // st_info (1 byte) = (binding << 4) | (type & 0xf)
793            let info = ((symbol.binding as u8) << 4) | (symbol.symbol_type as u8 & 0xf);
794            symtab.push(info);
795
796            // st_other (1 byte)
797            symtab.push(0);
798
799            // st_shndx (2 bytes)
800            symtab.extend_from_slice(&symbol.section.to_le_bytes());
801        }
802
803        symtab
804    }
805
806    /// Build section headers (with optional .rel.text)
807    #[allow(clippy::too_many_arguments)]
808    fn build_section_headers_with_rel(
809        &self,
810        section_name_offsets: &[usize],
811        shstrtab_offset: usize,
812        shstrtab_data: &[u8],
813        strtab_offset: usize,
814        strtab_data: &[u8],
815        symtab_offset: usize,
816        symtab_data: &[u8],
817        section_offsets: &[usize],
818        rel_offset: usize,
819        rel_data: &[u8],
820    ) -> Vec<u8> {
821        let mut headers = Vec::new();
822
823        // Section header size is 40 bytes for ELF32
824
825        // Section 0: null section
826        headers.extend_from_slice(&[0u8; 40]);
827
828        // Section 1: .shstrtab
829        self.write_section_header(
830            &mut headers,
831            1,
832            SectionType::StrTab as u32,
833            0,
834            0,
835            shstrtab_offset as u32,
836            shstrtab_data.len() as u32,
837            0,
838            0,
839            1,
840            0,
841        );
842
843        // Section 2: .strtab
844        let strtab_name_offset = ".shstrtab\0".len();
845        self.write_section_header(
846            &mut headers,
847            strtab_name_offset as u32,
848            SectionType::StrTab as u32,
849            0,
850            0,
851            strtab_offset as u32,
852            strtab_data.len() as u32,
853            0,
854            0,
855            1,
856            0,
857        );
858
859        // Section 3: .symtab (links to .strtab which is section 2)
860        let symtab_name_offset = ".shstrtab\0.strtab\0".len();
861        self.write_section_header(
862            &mut headers,
863            symtab_name_offset as u32,
864            SectionType::SymTab as u32,
865            0,
866            0,
867            symtab_offset as u32,
868            symtab_data.len() as u32,
869            2,
870            1,
871            4,
872            16,
873        );
874
875        // User sections
876        for (i, section) in self.sections.iter().enumerate() {
877            let name_offset = if i < section_name_offsets.len() {
878                section_name_offsets[i] as u32
879            } else {
880                0
881            };
882            let offset = if i < section_offsets.len() {
883                section_offsets[i] as u32
884            } else {
885                0
886            };
887
888            self.write_section_header(
889                &mut headers,
890                name_offset,
891                section.section_type as u32,
892                section.flags,
893                section.addr,
894                offset,
895                section.size(),
896                0,
897                0,
898                section.align,
899                0,
900            );
901        }
902
903        // .rel.text section (if relocations exist)
904        if !rel_data.is_empty() {
905            let rel_name_offset = self.rel_text_shstrtab_offset();
906            // sh_link = symtab section index (3), sh_info = .text section index (4, first user section)
907            let text_section_idx = 4u32; // null(0) + shstrtab(1) + strtab(2) + symtab(3) + .text(4)
908            self.write_section_header(
909                &mut headers,
910                rel_name_offset as u32,
911                SectionType::Rel as u32,
912                0,
913                0,
914                rel_offset as u32,
915                rel_data.len() as u32,
916                3,                // sh_link = .symtab section index
917                text_section_idx, // sh_info = section to which relocations apply
918                4,
919                8, // Each REL entry is 8 bytes
920            );
921        }
922
923        headers
924    }
925
926    /// Compute the shstrtab offset where .rel.text name begins
927    fn rel_text_shstrtab_offset(&self) -> usize {
928        // Layout: \0 .shstrtab\0 .strtab\0 .symtab\0 [user sections...] .rel.text\0
929        let mut offset = 1 + ".shstrtab\0".len() + ".strtab\0".len() + ".symtab\0".len();
930        for section in &self.sections {
931            offset += section.name.len() + 1;
932        }
933        offset
934    }
935
936    /// Write a single section header
937    #[allow(clippy::too_many_arguments)]
938    fn write_section_header(
939        &self,
940        output: &mut Vec<u8>,
941        name: u32,
942        sh_type: u32,
943        flags: u32,
944        addr: u32,
945        offset: u32,
946        size: u32,
947        link: u32,
948        info: u32,
949        align: u32,
950        entsize: u32,
951    ) {
952        output.extend_from_slice(&name.to_le_bytes());
953        output.extend_from_slice(&sh_type.to_le_bytes());
954        output.extend_from_slice(&flags.to_le_bytes());
955        output.extend_from_slice(&addr.to_le_bytes());
956        output.extend_from_slice(&offset.to_le_bytes());
957        output.extend_from_slice(&size.to_le_bytes());
958        output.extend_from_slice(&link.to_le_bytes());
959        output.extend_from_slice(&info.to_le_bytes());
960        output.extend_from_slice(&align.to_le_bytes());
961        output.extend_from_slice(&entsize.to_le_bytes());
962    }
963
964    /// Write ELF header (legacy method for tests)
965    #[allow(dead_code)]
966    fn write_elf_header(&self, output: &mut Vec<u8>) -> Result<()> {
967        // ELF magic number
968        output.extend_from_slice(&[0x7f, b'E', b'L', b'F']);
969
970        // Class (32-bit)
971        output.push(self.class as u8);
972
973        // Data (little-endian)
974        output.push(self.data as u8);
975
976        // Version
977        output.push(1);
978
979        // OS/ABI
980        output.push(0); // System V
981
982        // ABI version
983        output.push(0);
984
985        // Padding
986        output.extend_from_slice(&[0; 7]);
987
988        // Type (little-endian u16)
989        let etype = self.elf_type as u16;
990        output.extend_from_slice(&etype.to_le_bytes());
991
992        // Machine (little-endian u16)
993        let machine = self.machine as u16;
994        output.extend_from_slice(&machine.to_le_bytes());
995
996        // Version (little-endian u32)
997        output.extend_from_slice(&1u32.to_le_bytes());
998
999        // Entry point (little-endian u32)
1000        output.extend_from_slice(&self.entry.to_le_bytes());
1001
1002        // Program header offset (little-endian u32)
1003        output.extend_from_slice(&0u32.to_le_bytes());
1004
1005        // Section header offset (little-endian u32)
1006        output.extend_from_slice(&0u32.to_le_bytes());
1007
1008        // Flags (little-endian u32)
1009        output.extend_from_slice(&0u32.to_le_bytes());
1010
1011        // ELF header size (little-endian u16)
1012        output.extend_from_slice(&52u16.to_le_bytes());
1013
1014        // Program header entry size (little-endian u16)
1015        output.extend_from_slice(&0u16.to_le_bytes());
1016
1017        // Program header count (little-endian u16)
1018        output.extend_from_slice(&0u16.to_le_bytes());
1019
1020        // Section header entry size (little-endian u16)
1021        output.extend_from_slice(&40u16.to_le_bytes());
1022
1023        // Section header count (little-endian u16)
1024        output.extend_from_slice(&0u16.to_le_bytes());
1025
1026        // Section header string table index (little-endian u16)
1027        output.extend_from_slice(&0u16.to_le_bytes());
1028
1029        Ok(())
1030    }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035    use super::*;
1036
1037    #[test]
1038    fn test_elf_builder_creation() {
1039        let builder = ElfBuilder::new_arm32();
1040        assert_eq!(builder.class, ElfClass::Elf32);
1041        assert_eq!(builder.data, ElfData::LittleEndian);
1042        assert_eq!(builder.machine, ElfMachine::Arm);
1043    }
1044
1045    #[test]
1046    fn test_section_creation() {
1047        let section = Section::new(".text", SectionType::ProgBits)
1048            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1049            .with_addr(0x8000)
1050            .with_align(4);
1051
1052        assert_eq!(section.name, ".text");
1053        assert_eq!(section.section_type, SectionType::ProgBits);
1054        assert_eq!(section.addr, 0x8000);
1055        assert_eq!(section.align, 4);
1056    }
1057
1058    #[test]
1059    fn test_symbol_creation() {
1060        let symbol = Symbol::new("main")
1061            .with_value(0x8000)
1062            .with_size(128)
1063            .with_binding(SymbolBinding::Global)
1064            .with_type(SymbolType::Func)
1065            .with_section(1);
1066
1067        assert_eq!(symbol.name, "main");
1068        assert_eq!(symbol.value, 0x8000);
1069        assert_eq!(symbol.size, 128);
1070        assert_eq!(symbol.binding, SymbolBinding::Global);
1071        assert_eq!(symbol.symbol_type, SymbolType::Func);
1072    }
1073
1074    #[test]
1075    fn test_elf_header_generation() {
1076        let builder = ElfBuilder::new_arm32().with_entry(0x8000);
1077        let elf = builder.build().unwrap();
1078
1079        // Check magic number
1080        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1081
1082        // Check class (32-bit)
1083        assert_eq!(elf[4], 1);
1084
1085        // Check data (little-endian)
1086        assert_eq!(elf[5], 1);
1087
1088        // Check version
1089        assert_eq!(elf[6], 1);
1090    }
1091
1092    #[test]
1093    fn test_add_sections() {
1094        let mut builder = ElfBuilder::new_arm32();
1095
1096        let text = Section::new(".text", SectionType::ProgBits)
1097            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC);
1098
1099        let data = Section::new(".data", SectionType::ProgBits)
1100            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE);
1101
1102        builder.add_section(text);
1103        builder.add_section(data);
1104
1105        assert_eq!(builder.sections.len(), 2);
1106    }
1107
1108    #[test]
1109    fn test_add_symbols() {
1110        let mut builder = ElfBuilder::new_arm32();
1111
1112        let main_sym = Symbol::new("main")
1113            .with_binding(SymbolBinding::Global)
1114            .with_type(SymbolType::Func);
1115
1116        let data_sym = Symbol::new("data")
1117            .with_binding(SymbolBinding::Local)
1118            .with_type(SymbolType::Object);
1119
1120        builder.add_symbol(main_sym);
1121        builder.add_symbol(data_sym);
1122
1123        assert_eq!(builder.symbols.len(), 2);
1124    }
1125
1126    #[test]
1127    fn test_complete_elf_generation() {
1128        // Create a complete ELF file with sections and symbols
1129        let mut builder = ElfBuilder::new_arm32()
1130            .with_entry(0x8000)
1131            .with_type(ElfType::Exec);
1132
1133        // Add .text section with some ARM code
1134        let text_code = vec![
1135            0x00, 0x48, 0x2d, 0xe9, // push {fp, lr}
1136            0x04, 0xb0, 0x8d, 0xe2, // add fp, sp, #4
1137            0x00, 0x00, 0xa0, 0xe3, // mov r0, #0
1138            0x00, 0x88, 0xbd, 0xe8, // pop {fp, pc}
1139        ];
1140        let text = Section::new(".text", SectionType::ProgBits)
1141            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1142            .with_addr(0x8000)
1143            .with_align(4)
1144            .with_data(text_code);
1145
1146        builder.add_section(text);
1147
1148        // Add .data section
1149        let data_content = vec![0x01, 0x02, 0x03, 0x04];
1150        let data = Section::new(".data", SectionType::ProgBits)
1151            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE)
1152            .with_addr(0x8100)
1153            .with_align(4)
1154            .with_data(data_content);
1155
1156        builder.add_section(data);
1157
1158        // Add .bss section (no data)
1159        let bss = Section::new(".bss", SectionType::NoBits)
1160            .with_flags(SectionFlags::ALLOC | SectionFlags::WRITE)
1161            .with_addr(0x8200)
1162            .with_align(4);
1163
1164        builder.add_section(bss);
1165
1166        // Add symbols
1167        let main_sym = Symbol::new("main")
1168            .with_value(0x8000)
1169            .with_size(16)
1170            .with_binding(SymbolBinding::Global)
1171            .with_type(SymbolType::Func)
1172            .with_section(4); // .text is section 4 (0=null, 1=shstrtab, 2=strtab, 3=symtab, 4=.text)
1173
1174        builder.add_symbol(main_sym);
1175
1176        let data_var = Symbol::new("global_var")
1177            .with_value(0x8100)
1178            .with_size(4)
1179            .with_binding(SymbolBinding::Global)
1180            .with_type(SymbolType::Object)
1181            .with_section(5); // .data is section 5
1182
1183        builder.add_symbol(data_var);
1184
1185        // Build the ELF file
1186        let elf = builder.build().unwrap();
1187
1188        // Validate ELF header
1189        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1190        assert_eq!(elf[4], 1); // 32-bit
1191        assert_eq!(elf[5], 1); // little-endian
1192        assert_eq!(elf[6], 1); // version
1193
1194        // Check that we have a reasonable file size
1195        assert!(elf.len() > 52); // At least header size
1196        assert!(elf.len() < 10000); // Reasonable upper bound
1197
1198        // Validate entry point is set correctly (Thumb bit set for ARM)
1199        let entry_bytes = &elf[24..28];
1200        let entry = u32::from_le_bytes([
1201            entry_bytes[0],
1202            entry_bytes[1],
1203            entry_bytes[2],
1204            entry_bytes[3],
1205        ]);
1206        assert_eq!(entry, 0x8001); // 0x8000 | 1 (Thumb bit)
1207
1208        // Validate section header offset is non-zero
1209        let sh_off_bytes = &elf[32..36];
1210        let sh_off = u32::from_le_bytes([
1211            sh_off_bytes[0],
1212            sh_off_bytes[1],
1213            sh_off_bytes[2],
1214            sh_off_bytes[3],
1215        ]);
1216        assert!(sh_off > 0);
1217
1218        // Validate section count (null + shstrtab + strtab + symtab + .text + .data + .bss = 7)
1219        let sh_num_bytes = &elf[48..50];
1220        let sh_num = u16::from_le_bytes([sh_num_bytes[0], sh_num_bytes[1]]);
1221        assert_eq!(sh_num, 7);
1222
1223        // Validate string table index points to .shstrtab (section 1)
1224        let shstrndx_bytes = &elf[50..52];
1225        let shstrndx = u16::from_le_bytes([shstrndx_bytes[0], shstrndx_bytes[1]]);
1226        assert_eq!(shstrndx, 1);
1227    }
1228
1229    #[test]
1230    fn test_string_table_generation() {
1231        let mut builder = ElfBuilder::new_arm32();
1232
1233        builder.add_section(Section::new(".text", SectionType::ProgBits));
1234        builder.add_section(Section::new(".data", SectionType::ProgBits));
1235
1236        let (strtab, offsets) = builder.build_section_string_table();
1237
1238        // Should have null byte at start
1239        assert_eq!(strtab[0], 0);
1240
1241        // Should contain .shstrtab, .strtab, .symtab, .text, .data
1242        let strtab_str = String::from_utf8_lossy(&strtab);
1243        assert!(strtab_str.contains(".shstrtab"));
1244        assert!(strtab_str.contains(".strtab"));
1245        assert!(strtab_str.contains(".symtab"));
1246        assert!(strtab_str.contains(".text"));
1247        assert!(strtab_str.contains(".data"));
1248
1249        // Should have offsets for user sections
1250        assert_eq!(offsets.len(), 2);
1251    }
1252
1253    #[test]
1254    fn test_relocation_support() {
1255        let mut builder = ElfBuilder::new_arm32()
1256            .with_entry(0x8000)
1257            .with_type(ElfType::Rel);
1258
1259        // Add .text section with a BL placeholder
1260        let text_code = vec![0x00u8; 16]; // 4 instructions of placeholder
1261        let text = Section::new(".text", SectionType::ProgBits)
1262            .with_flags(SectionFlags::ALLOC | SectionFlags::EXEC)
1263            .with_addr(0x8000)
1264            .with_align(4)
1265            .with_data(text_code);
1266        builder.add_section(text);
1267
1268        // Add undefined external symbol
1269        let sym_idx = builder.add_undefined_symbol("__meld_dispatch_import");
1270        assert!(sym_idx > 0);
1271
1272        // Add relocation for the BL at offset 4
1273        builder.add_relocation(Relocation {
1274            offset: 4,
1275            symbol_index: sym_idx,
1276            reloc_type: ArmRelocationType::Call,
1277        });
1278
1279        let elf = builder.build().unwrap();
1280
1281        // Verify ELF is valid
1282        assert_eq!(&elf[0..4], &[0x7f, b'E', b'L', b'F']);
1283
1284        // Section count should include .rel.text
1285        // null(1) + shstrtab(1) + strtab(1) + symtab(1) + .text(1) + .rel.text(1) = 6
1286        let sh_num = u16::from_le_bytes([elf[48], elf[49]]);
1287        assert_eq!(sh_num, 6);
1288
1289        // Verify the symbol table contains the undefined symbol
1290        // (section = 0 for SHN_UNDEF)
1291        let has_undef = elf
1292            .windows(b"__meld_dispatch_import".len())
1293            .any(|w| w == b"__meld_dispatch_import");
1294        assert!(
1295            has_undef,
1296            "ELF should contain __meld_dispatch_import symbol name"
1297        );
1298    }
1299
1300    #[test]
1301    fn test_symbol_table_encoding() {
1302        let mut builder = ElfBuilder::new_arm32();
1303
1304        let sym = Symbol::new("test_func")
1305            .with_value(0x1000)
1306            .with_size(64)
1307            .with_binding(SymbolBinding::Global)
1308            .with_type(SymbolType::Func)
1309            .with_section(1);
1310
1311        builder.add_symbol(sym);
1312
1313        let (_strtab, offsets) = builder.build_symbol_string_table();
1314        let symtab = builder.build_symbol_table(&offsets);
1315
1316        // Should have null symbol (16 bytes) + 1 symbol (16 bytes) = 32 bytes
1317        assert_eq!(symtab.len(), 32);
1318
1319        // First symbol should be all zeros
1320        assert!(symtab[0..16].iter().all(|&b| b == 0));
1321
1322        // Second symbol should have correct encoding
1323        // Check st_value (bytes 4-7 of second entry)
1324        // For ARM STT_FUNC symbols, bit 0 is set for Thumb interworking
1325        let value_bytes = &symtab[20..24];
1326        let value = u32::from_le_bytes([
1327            value_bytes[0],
1328            value_bytes[1],
1329            value_bytes[2],
1330            value_bytes[3],
1331        ]);
1332        assert_eq!(value, 0x1001); // 0x1000 | 1 (Thumb bit)
1333
1334        // Check st_size (bytes 8-11 of second entry)
1335        let size_bytes = &symtab[24..28];
1336        let size = u32::from_le_bytes([size_bytes[0], size_bytes[1], size_bytes[2], size_bytes[3]]);
1337        assert_eq!(size, 64);
1338
1339        // Check st_info (byte 12 of second entry)
1340        let info = symtab[28];
1341        let binding = info >> 4;
1342        let sym_type = info & 0xf;
1343        assert_eq!(binding, SymbolBinding::Global as u8);
1344        assert_eq!(sym_type, SymbolType::Func as u8);
1345    }
1346}