Skip to main content

ud_format/
elf.rs

1//! ELF reader and writer with byte-identical round-trip.
2//!
3//! Handles both ELFCLASS32 and ELFCLASS64 little-endian images. The
4//! parsed representation always uses 64-bit-shaped headers (`Ehdr64`,
5//! `Phdr64`, `Shdr64`) regardless of input class — 32-bit fields are
6//! zero-extended on parse and truncated on write. The on-disk format
7//! is recorded in [`Elf64File::class`] and used to dispatch the right
8//! header layout when serialising.
9//!
10//! The contract: for any supported input `bytes`,
11//! `Elf64File::parse(bytes)?.write_to_vec() == bytes`.
12//!
13//! Anything not in scope for this crate is preserved as opaque bytes and
14//! re-emitted verbatim. Section *contents* (bytes inside `.text`, `.rodata`,
15//! `.symtab`, etc.) are never interpreted here — that belongs to the arch
16//! backends and analysis crates.
17
18#![allow(clippy::cast_possible_truncation)]
19
20use std::ops::Range;
21
22/// Size of `e_ident` in any ELF.
23const EI_NIDENT: usize = 16;
24
25/// ELF magic bytes (`\x7fELF`) at the start of `e_ident`.
26pub const ELF_MAGIC: [u8; 4] = [0x7f, b'E', b'L', b'F'];
27
28/// Compatibility alias for the original private name —
29/// many internal sites still reference `ELFMAG`.
30pub(crate) const ELFMAG: [u8; 4] = ELF_MAGIC;
31
32/// `e_ident[EI_CLASS]` value for 32-bit objects.
33const ELFCLASS32: u8 = 1;
34
35/// `e_ident[EI_CLASS]` value for 64-bit objects.
36const ELFCLASS64: u8 = 2;
37
38/// `e_ident[EI_DATA]` value for 2's complement little-endian.
39const ELFDATA2LSB: u8 = 1;
40
41/// Whether the on-disk image used 32-bit or 64-bit headers. Recorded
42/// at parse time and consulted on write to round-trip the original
43/// byte layout exactly.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum ElfClass {
46    Elf32,
47    Elf64,
48}
49
50/// `sh_type` indicating the section occupies no file space (e.g. `.bss`).
51const SHT_NOBITS: u32 = 8;
52
53/// `sh_type` for a fully-linked symbol table.
54pub const SHT_SYMTAB: u32 = 2;
55
56/// `sh_type` for a string table.
57pub const SHT_STRTAB: u32 = 3;
58
59/// `sh_type` for the dynamic-linking symbol table (always present in dynamic
60/// executables and shared objects).
61pub const SHT_DYNSYM: u32 = 11;
62
63/// `sh_type` for a relocation table with explicit addends (`Elf64_Rela`).
64pub const SHT_RELA: u32 = 4;
65
66/// `sh_type` for an ELF relocation table (no addend) — `Elf64_Rel`
67/// entries, 16 bytes each: `r_offset:8 ; r_info:8`. Used by BPF
68/// (LLVM emits `SHT_REL`, not `SHT_RELA`, for BPF objects).
69pub const SHT_REL: u32 = 9;
70
71/// `sh_flags` bit indicating the section contains executable instructions.
72pub const SHF_EXECINSTR: u64 = 0x4;
73
74/// `e_machine` value for i386 (32-bit x86).
75pub const EM_386: u16 = 3;
76
77/// `e_machine` value for x86-64.
78pub const EM_X86_64: u16 = 62;
79
80/// `e_machine` value for `AArch64`.
81pub const EM_AARCH64: u16 = 183;
82
83/// `e_machine` value for Linux eBPF.
84pub const EM_BPF: u16 = 247;
85
86/// `e_machine` value for Solana SBF (classic sBPFv1 / sBPFv2).
87/// Not assigned in the GABI registry but used by the Solana
88/// toolchain and Agave loader for on-chain programs.
89pub const EM_SBF: u16 = 263;
90
91/// BPF relocation types (LLVM `lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp`).
92/// `R_BPF_64_32` is the one we care about for syscall name
93/// resolution — the `call <imm>` form. Other types apply to
94/// data references and `lddw r, imm64` slots; we recognise
95/// the names but don't need to act on them for layer 1.
96pub const R_BPF_NONE: u32 = 0;
97pub const R_BPF_64_64: u32 = 1;
98pub const R_BPF_64_ABS64: u32 = 2;
99pub const R_BPF_64_ABS32: u32 = 3;
100pub const R_BPF_64_NODYLD32: u32 = 4;
101pub const R_BPF_64_32: u32 = 10;
102/// Solana-specific dynamic relocation: an absolute 64-bit
103/// pointer adjusted at load time by the program's load
104/// address. Used by BPFLoaderUpgradeable for data refs.
105pub const R_BPF_64_RELATIVE: u32 = 8;
106
107/// On-disk size of an ELF64 ELF header.
108const EHDR64_SIZE: u16 = 64;
109
110/// On-disk size of an ELF64 program header entry.
111const PHDR64_SIZE: u16 = 56;
112
113/// On-disk size of an ELF64 section header entry.
114const SHDR64_SIZE: u16 = 64;
115
116/// On-disk size of an ELF32 ELF header.
117const EHDR32_SIZE: u16 = 52;
118
119/// On-disk size of an ELF32 program header entry.
120const PHDR32_SIZE: u16 = 32;
121
122/// On-disk size of an ELF32 section header entry.
123const SHDR32_SIZE: u16 = 40;
124
125const fn ehdr_size(class: ElfClass) -> u16 {
126    match class {
127        ElfClass::Elf32 => EHDR32_SIZE,
128        ElfClass::Elf64 => EHDR64_SIZE,
129    }
130}
131
132const fn phdr_size(class: ElfClass) -> u16 {
133    match class {
134        ElfClass::Elf32 => PHDR32_SIZE,
135        ElfClass::Elf64 => PHDR64_SIZE,
136    }
137}
138
139const fn shdr_size(class: ElfClass) -> u16 {
140    match class {
141        ElfClass::Elf32 => SHDR32_SIZE,
142        ElfClass::Elf64 => SHDR64_SIZE,
143    }
144}
145
146/// Errors surfaced when parsing or writing an ELF64 file.
147#[derive(Debug, thiserror::Error)]
148pub enum Error {
149    #[error("file too short: needed {needed} bytes at offset {offset}, have {have}")]
150    Truncated { offset: u64, needed: u64, have: u64 },
151
152    #[error("not an ELF file: bad magic {0:02x?}")]
153    BadMagic([u8; 4]),
154
155    #[error("unsupported ELF class: {0} (only ELFCLASS32 = 1 and ELFCLASS64 = 2 are implemented)")]
156    UnsupportedClass(u8),
157
158    #[error("unsupported ELF data encoding: {0} (only ELFDATA2LSB = 1 is implemented)")]
159    UnsupportedEncoding(u8),
160
161    #[error("unexpected e_ehsize: header says {got}, on-disk ELF64 size is {expected}")]
162    BadEhsize { got: u16, expected: u16 },
163
164    #[error("unexpected e_phentsize: header says {got}, on-disk ELF64 phdr size is {expected}")]
165    BadPhentsize { got: u16, expected: u16 },
166
167    #[error("unexpected e_shentsize: header says {got}, on-disk ELF64 shdr size is {expected}")]
168    BadShentsize { got: u16, expected: u16 },
169
170    #[error(
171        "structured regions overlap: {a_label} at {a_start}..{a_end} vs {b_label} at {b_start}..{b_end}"
172    )]
173    OverlappingRegions {
174        a_label: String,
175        a_start: u64,
176        a_end: u64,
177        b_label: String,
178        b_start: u64,
179        b_end: u64,
180    },
181
182    #[error("integer overflow computing region end for {label} at offset {offset} size {size}")]
183    RegionOverflow {
184        label: String,
185        offset: u64,
186        size: u64,
187    },
188}
189
190pub type Result<T, E = Error> = std::result::Result<T, E>;
191
192/// Parsed ELF64 ELF header.
193///
194/// Field names mirror the ELF spec verbatim. The struct is public so
195/// downstream crates can read these for analysis; mutation through public
196/// fields is *not* part of a stability contract yet — invariants like
197/// `e_ehsize == EHDR64_SIZE` are enforced only at parse time.
198#[derive(Debug, Clone, PartialEq, Eq)]
199pub struct Ehdr64 {
200    pub e_ident: [u8; EI_NIDENT],
201    pub e_type: u16,
202    pub e_machine: u16,
203    pub e_version: u32,
204    pub e_entry: u64,
205    pub e_phoff: u64,
206    pub e_shoff: u64,
207    pub e_flags: u32,
208    pub e_ehsize: u16,
209    pub e_phentsize: u16,
210    pub e_phnum: u16,
211    pub e_shentsize: u16,
212    pub e_shnum: u16,
213    pub e_shstrndx: u16,
214}
215
216impl Ehdr64 {
217    fn parse(bytes: &[u8]) -> Result<(Self, ElfClass)> {
218        if bytes.len() < EI_NIDENT {
219            return Err(Error::Truncated {
220                offset: 0,
221                needed: EI_NIDENT as u64,
222                have: bytes.len() as u64,
223            });
224        }
225        let mut e_ident = [0u8; EI_NIDENT];
226        e_ident.copy_from_slice(&bytes[..EI_NIDENT]);
227
228        if e_ident[0..4] != ELFMAG {
229            let mut bad = [0u8; 4];
230            bad.copy_from_slice(&e_ident[0..4]);
231            return Err(Error::BadMagic(bad));
232        }
233        if e_ident[5] != ELFDATA2LSB {
234            return Err(Error::UnsupportedEncoding(e_ident[5]));
235        }
236
237        let class = match e_ident[4] {
238            ELFCLASS32 => ElfClass::Elf32,
239            ELFCLASS64 => ElfClass::Elf64,
240            other => return Err(Error::UnsupportedClass(other)),
241        };
242
243        match class {
244            ElfClass::Elf32 => Self::parse_32(bytes, e_ident).map(|h| (h, class)),
245            ElfClass::Elf64 => Self::parse_64(bytes, e_ident).map(|h| (h, class)),
246        }
247    }
248
249    fn parse_64(bytes: &[u8], e_ident: [u8; EI_NIDENT]) -> Result<Self> {
250        ensure_len(bytes, 0, EHDR64_SIZE.into())?;
251        let e_type = read_u16(bytes, 16);
252        let e_machine = read_u16(bytes, 18);
253        let e_version = read_u32(bytes, 20);
254        let e_entry = read_u64(bytes, 24);
255        let e_phoff = read_u64(bytes, 32);
256        let e_shoff = read_u64(bytes, 40);
257        let e_flags = read_u32(bytes, 48);
258        let e_ehsize = read_u16(bytes, 52);
259        let e_phentsize = read_u16(bytes, 54);
260        let e_phnum = read_u16(bytes, 56);
261        let e_shentsize = read_u16(bytes, 58);
262        let e_shnum = read_u16(bytes, 60);
263        let e_shstrndx = read_u16(bytes, 62);
264
265        if e_ehsize != EHDR64_SIZE {
266            return Err(Error::BadEhsize {
267                got: e_ehsize,
268                expected: EHDR64_SIZE,
269            });
270        }
271        if e_phnum > 0 && e_phentsize != PHDR64_SIZE {
272            return Err(Error::BadPhentsize {
273                got: e_phentsize,
274                expected: PHDR64_SIZE,
275            });
276        }
277        if e_shnum > 0 && e_shentsize != SHDR64_SIZE {
278            return Err(Error::BadShentsize {
279                got: e_shentsize,
280                expected: SHDR64_SIZE,
281            });
282        }
283
284        Ok(Self {
285            e_ident,
286            e_type,
287            e_machine,
288            e_version,
289            e_entry,
290            e_phoff,
291            e_shoff,
292            e_flags,
293            e_ehsize,
294            e_phentsize,
295            e_phnum,
296            e_shentsize,
297            e_shnum,
298            e_shstrndx,
299        })
300    }
301
302    fn parse_32(bytes: &[u8], e_ident: [u8; EI_NIDENT]) -> Result<Self> {
303        ensure_len(bytes, 0, EHDR32_SIZE.into())?;
304        let e_type = read_u16(bytes, 16);
305        let e_machine = read_u16(bytes, 18);
306        let e_version = read_u32(bytes, 20);
307        let e_entry = u64::from(read_u32(bytes, 24));
308        let e_phoff = u64::from(read_u32(bytes, 28));
309        let e_shoff = u64::from(read_u32(bytes, 32));
310        let e_flags = read_u32(bytes, 36);
311        let e_ehsize = read_u16(bytes, 40);
312        let e_phentsize = read_u16(bytes, 42);
313        let e_phnum = read_u16(bytes, 44);
314        let e_shentsize = read_u16(bytes, 46);
315        let e_shnum = read_u16(bytes, 48);
316        let e_shstrndx = read_u16(bytes, 50);
317
318        if e_ehsize != EHDR32_SIZE {
319            return Err(Error::BadEhsize {
320                got: e_ehsize,
321                expected: EHDR32_SIZE,
322            });
323        }
324        if e_phnum > 0 && e_phentsize != PHDR32_SIZE {
325            return Err(Error::BadPhentsize {
326                got: e_phentsize,
327                expected: PHDR32_SIZE,
328            });
329        }
330        if e_shnum > 0 && e_shentsize != SHDR32_SIZE {
331            return Err(Error::BadShentsize {
332                got: e_shentsize,
333                expected: SHDR32_SIZE,
334            });
335        }
336
337        Ok(Self {
338            e_ident,
339            e_type,
340            e_machine,
341            e_version,
342            e_entry,
343            e_phoff,
344            e_shoff,
345            e_flags,
346            e_ehsize,
347            e_phentsize,
348            e_phnum,
349            e_shentsize,
350            e_shnum,
351            e_shstrndx,
352        })
353    }
354
355    fn write(&self, class: ElfClass, out: &mut [u8]) {
356        match class {
357            ElfClass::Elf64 => self.write_64(out),
358            ElfClass::Elf32 => self.write_32(out),
359        }
360    }
361
362    fn write_64(&self, out: &mut [u8]) {
363        debug_assert!(out.len() >= EHDR64_SIZE as usize);
364        out[..EI_NIDENT].copy_from_slice(&self.e_ident);
365        write_u16(out, 16, self.e_type);
366        write_u16(out, 18, self.e_machine);
367        write_u32(out, 20, self.e_version);
368        write_u64(out, 24, self.e_entry);
369        write_u64(out, 32, self.e_phoff);
370        write_u64(out, 40, self.e_shoff);
371        write_u32(out, 48, self.e_flags);
372        write_u16(out, 52, self.e_ehsize);
373        write_u16(out, 54, self.e_phentsize);
374        write_u16(out, 56, self.e_phnum);
375        write_u16(out, 58, self.e_shentsize);
376        write_u16(out, 60, self.e_shnum);
377        write_u16(out, 62, self.e_shstrndx);
378    }
379
380    fn write_32(&self, out: &mut [u8]) {
381        debug_assert!(out.len() >= EHDR32_SIZE as usize);
382        out[..EI_NIDENT].copy_from_slice(&self.e_ident);
383        write_u16(out, 16, self.e_type);
384        write_u16(out, 18, self.e_machine);
385        write_u32(out, 20, self.e_version);
386        write_u32(out, 24, self.e_entry as u32);
387        write_u32(out, 28, self.e_phoff as u32);
388        write_u32(out, 32, self.e_shoff as u32);
389        write_u32(out, 36, self.e_flags);
390        write_u16(out, 40, self.e_ehsize);
391        write_u16(out, 42, self.e_phentsize);
392        write_u16(out, 44, self.e_phnum);
393        write_u16(out, 46, self.e_shentsize);
394        write_u16(out, 48, self.e_shnum);
395        write_u16(out, 50, self.e_shstrndx);
396    }
397}
398
399/// Parsed ELF64 program header entry.
400#[derive(Debug, Clone, PartialEq, Eq)]
401pub struct Phdr64 {
402    pub p_type: u32,
403    pub p_flags: u32,
404    pub p_offset: u64,
405    pub p_vaddr: u64,
406    pub p_paddr: u64,
407    pub p_filesz: u64,
408    pub p_memsz: u64,
409    pub p_align: u64,
410}
411
412impl Phdr64 {
413    fn parse(class: ElfClass, bytes: &[u8]) -> Self {
414        match class {
415            ElfClass::Elf64 => Self::parse_64(bytes),
416            ElfClass::Elf32 => Self::parse_32(bytes),
417        }
418    }
419
420    fn parse_64(bytes: &[u8]) -> Self {
421        debug_assert!(bytes.len() >= PHDR64_SIZE as usize);
422        Self {
423            p_type: read_u32(bytes, 0),
424            p_flags: read_u32(bytes, 4),
425            p_offset: read_u64(bytes, 8),
426            p_vaddr: read_u64(bytes, 16),
427            p_paddr: read_u64(bytes, 24),
428            p_filesz: read_u64(bytes, 32),
429            p_memsz: read_u64(bytes, 40),
430            p_align: read_u64(bytes, 48),
431        }
432    }
433
434    fn parse_32(bytes: &[u8]) -> Self {
435        // Note the Elf32_Phdr field order differs from Elf64_Phdr:
436        //   type offset vaddr paddr filesz memsz flags align
437        // (whereas Elf64 places `flags` immediately after `type`).
438        debug_assert!(bytes.len() >= PHDR32_SIZE as usize);
439        Self {
440            p_type: read_u32(bytes, 0),
441            p_offset: u64::from(read_u32(bytes, 4)),
442            p_vaddr: u64::from(read_u32(bytes, 8)),
443            p_paddr: u64::from(read_u32(bytes, 12)),
444            p_filesz: u64::from(read_u32(bytes, 16)),
445            p_memsz: u64::from(read_u32(bytes, 20)),
446            p_flags: read_u32(bytes, 24),
447            p_align: u64::from(read_u32(bytes, 28)),
448        }
449    }
450
451    fn write(&self, class: ElfClass, out: &mut [u8]) {
452        match class {
453            ElfClass::Elf64 => self.write_64(out),
454            ElfClass::Elf32 => self.write_32(out),
455        }
456    }
457
458    fn write_64(&self, out: &mut [u8]) {
459        debug_assert!(out.len() >= PHDR64_SIZE as usize);
460        write_u32(out, 0, self.p_type);
461        write_u32(out, 4, self.p_flags);
462        write_u64(out, 8, self.p_offset);
463        write_u64(out, 16, self.p_vaddr);
464        write_u64(out, 24, self.p_paddr);
465        write_u64(out, 32, self.p_filesz);
466        write_u64(out, 40, self.p_memsz);
467        write_u64(out, 48, self.p_align);
468    }
469
470    fn write_32(&self, out: &mut [u8]) {
471        debug_assert!(out.len() >= PHDR32_SIZE as usize);
472        write_u32(out, 0, self.p_type);
473        write_u32(out, 4, self.p_offset as u32);
474        write_u32(out, 8, self.p_vaddr as u32);
475        write_u32(out, 12, self.p_paddr as u32);
476        write_u32(out, 16, self.p_filesz as u32);
477        write_u32(out, 20, self.p_memsz as u32);
478        write_u32(out, 24, self.p_flags);
479        write_u32(out, 28, self.p_align as u32);
480    }
481}
482
483/// Parsed ELF64 section header entry.
484#[derive(Debug, Clone, PartialEq, Eq)]
485pub struct Shdr64 {
486    pub sh_name: u32,
487    pub sh_type: u32,
488    pub sh_flags: u64,
489    pub sh_addr: u64,
490    pub sh_offset: u64,
491    pub sh_size: u64,
492    pub sh_link: u32,
493    pub sh_info: u32,
494    pub sh_addralign: u64,
495    pub sh_entsize: u64,
496}
497
498impl Shdr64 {
499    fn parse(class: ElfClass, bytes: &[u8]) -> Self {
500        match class {
501            ElfClass::Elf64 => Self::parse_64(bytes),
502            ElfClass::Elf32 => Self::parse_32(bytes),
503        }
504    }
505
506    fn parse_64(bytes: &[u8]) -> Self {
507        debug_assert!(bytes.len() >= SHDR64_SIZE as usize);
508        Self {
509            sh_name: read_u32(bytes, 0),
510            sh_type: read_u32(bytes, 4),
511            sh_flags: read_u64(bytes, 8),
512            sh_addr: read_u64(bytes, 16),
513            sh_offset: read_u64(bytes, 24),
514            sh_size: read_u64(bytes, 32),
515            sh_link: read_u32(bytes, 40),
516            sh_info: read_u32(bytes, 44),
517            sh_addralign: read_u64(bytes, 48),
518            sh_entsize: read_u64(bytes, 56),
519        }
520    }
521
522    fn parse_32(bytes: &[u8]) -> Self {
523        // Elf32_Shdr field order matches Elf64_Shdr; only widths differ.
524        debug_assert!(bytes.len() >= SHDR32_SIZE as usize);
525        Self {
526            sh_name: read_u32(bytes, 0),
527            sh_type: read_u32(bytes, 4),
528            sh_flags: u64::from(read_u32(bytes, 8)),
529            sh_addr: u64::from(read_u32(bytes, 12)),
530            sh_offset: u64::from(read_u32(bytes, 16)),
531            sh_size: u64::from(read_u32(bytes, 20)),
532            sh_link: read_u32(bytes, 24),
533            sh_info: read_u32(bytes, 28),
534            sh_addralign: u64::from(read_u32(bytes, 32)),
535            sh_entsize: u64::from(read_u32(bytes, 36)),
536        }
537    }
538
539    fn write(&self, class: ElfClass, out: &mut [u8]) {
540        match class {
541            ElfClass::Elf64 => self.write_64(out),
542            ElfClass::Elf32 => self.write_32(out),
543        }
544    }
545
546    fn write_64(&self, out: &mut [u8]) {
547        debug_assert!(out.len() >= SHDR64_SIZE as usize);
548        write_u32(out, 0, self.sh_name);
549        write_u32(out, 4, self.sh_type);
550        write_u64(out, 8, self.sh_flags);
551        write_u64(out, 16, self.sh_addr);
552        write_u64(out, 24, self.sh_offset);
553        write_u64(out, 32, self.sh_size);
554        write_u32(out, 40, self.sh_link);
555        write_u32(out, 44, self.sh_info);
556        write_u64(out, 48, self.sh_addralign);
557        write_u64(out, 56, self.sh_entsize);
558    }
559
560    fn write_32(&self, out: &mut [u8]) {
561        debug_assert!(out.len() >= SHDR32_SIZE as usize);
562        write_u32(out, 0, self.sh_name);
563        write_u32(out, 4, self.sh_type);
564        write_u32(out, 8, self.sh_flags as u32);
565        write_u32(out, 12, self.sh_addr as u32);
566        write_u32(out, 16, self.sh_offset as u32);
567        write_u32(out, 20, self.sh_size as u32);
568        write_u32(out, 24, self.sh_link);
569        write_u32(out, 28, self.sh_info);
570        write_u32(out, 32, self.sh_addralign as u32);
571        write_u32(out, 36, self.sh_entsize as u32);
572    }
573
574    fn occupies_file(&self) -> bool {
575        self.sh_type != SHT_NOBITS && self.sh_size > 0
576    }
577}
578
579/// A parsed ELF64 file in a form that round-trips byte-identically.
580///
581/// The structured fields (`ehdr`, `phdrs`, `shdrs`) are interpreted; the
582/// raw bytes inside sections and any interstitial padding are stored
583/// verbatim. On `write_to_vec`, the structured fields are reassembled and
584/// the verbatim bytes are dropped back in place at their original offsets.
585#[derive(Debug, Clone)]
586pub struct Elf64File {
587    /// On-disk header layout. Determines whether the headers
588    /// re-emit as 32-bit or 64-bit on serialisation.
589    pub class: ElfClass,
590
591    pub ehdr: Ehdr64,
592    pub phdrs: Vec<Phdr64>,
593    pub shdrs: Vec<Shdr64>,
594
595    /// Section file content, parallel to `shdrs`. Empty for NOBITS or
596    /// zero-size sections.
597    section_data: Vec<Vec<u8>>,
598
599    /// Bytes that fall in the gaps between structured regions (e.g.
600    /// alignment padding between sections). Stored as `(file_offset, bytes)`.
601    padding: Vec<(u64, Vec<u8>)>,
602
603    /// Total size of the file, in bytes.
604    file_size: u64,
605}
606
607/// Returns true if `bytes` start with the ELF magic.
608///
609/// This says nothing about class (32 vs 64) or endianness — a true return
610/// means *some* flavor of ELF, not necessarily one this crate supports.
611#[must_use]
612pub fn is_elf(bytes: &[u8]) -> bool {
613    bytes.len() >= 4 && bytes[..4] == ELFMAG
614}
615
616/// Returns true iff `bytes` are an ELF little-endian image of either
617/// class — the flavors [`Elf64File::parse`] handles. Callers that
618/// route by format (e.g. the CLI's round-trip pipeline) should gate
619/// on this and fall through to a byte-copy for unsupported variants
620/// so the round-trip contract still holds.
621#[must_use]
622pub fn is_elf64_le(bytes: &[u8]) -> bool {
623    bytes.len() >= 6
624        && bytes[..4] == ELFMAG
625        && (bytes[4] == ELFCLASS32 || bytes[4] == ELFCLASS64)
626        && bytes[5] == ELFDATA2LSB
627}
628
629impl Elf64File {
630    /// Parse an ELF LE file (either ELFCLASS32 or ELFCLASS64) into a
631    /// structure that round-trips byte-identically.
632    pub fn parse(bytes: &[u8]) -> Result<Self> {
633        let (ehdr, class) = Ehdr64::parse(bytes)?;
634
635        let phdrs = Self::parse_phdrs(class, bytes, &ehdr)?;
636        let (shdrs, section_data) = Self::parse_shdrs_and_sections(class, bytes, &ehdr)?;
637
638        let regions = build_regions(class, &ehdr, &shdrs)?;
639        let padding = compute_padding(bytes, &regions);
640
641        Ok(Self {
642            class,
643            ehdr,
644            phdrs,
645            shdrs,
646            section_data,
647            padding,
648            file_size: bytes.len() as u64,
649        })
650    }
651
652    fn parse_phdrs(class: ElfClass, bytes: &[u8], ehdr: &Ehdr64) -> Result<Vec<Phdr64>> {
653        let count = ehdr.e_phnum as usize;
654        if count == 0 {
655            return Ok(Vec::new());
656        }
657        let entry_size = phdr_size(class) as usize;
658        let total = count
659            .checked_mul(entry_size)
660            .ok_or_else(|| Error::RegionOverflow {
661                label: "program-header table".into(),
662                offset: ehdr.e_phoff,
663                size: count as u64 * entry_size as u64,
664            })?;
665        ensure_len(bytes, ehdr.e_phoff, total as u64)?;
666        let start = ehdr.e_phoff as usize;
667        let mut phdrs = Vec::with_capacity(count);
668        for i in 0..count {
669            let off = start + i * entry_size;
670            phdrs.push(Phdr64::parse(class, &bytes[off..off + entry_size]));
671        }
672        Ok(phdrs)
673    }
674
675    fn parse_shdrs_and_sections(
676        class: ElfClass,
677        bytes: &[u8],
678        ehdr: &Ehdr64,
679    ) -> Result<(Vec<Shdr64>, Vec<Vec<u8>>)> {
680        let count = ehdr.e_shnum as usize;
681        if count == 0 {
682            return Ok((Vec::new(), Vec::new()));
683        }
684        let entry_size = shdr_size(class) as usize;
685        let total = count
686            .checked_mul(entry_size)
687            .ok_or_else(|| Error::RegionOverflow {
688                label: "section-header table".into(),
689                offset: ehdr.e_shoff,
690                size: count as u64 * entry_size as u64,
691            })?;
692        ensure_len(bytes, ehdr.e_shoff, total as u64)?;
693        let start = ehdr.e_shoff as usize;
694
695        let mut shdrs = Vec::with_capacity(count);
696        let mut section_data = Vec::with_capacity(count);
697        for i in 0..count {
698            let off = start + i * entry_size;
699            let sh = Shdr64::parse(class, &bytes[off..off + entry_size]);
700            if sh.occupies_file() {
701                ensure_len(bytes, sh.sh_offset, sh.sh_size)?;
702                let data_off = sh.sh_offset as usize;
703                let data_end = data_off + sh.sh_size as usize;
704                section_data.push(bytes[data_off..data_end].to_vec());
705            } else {
706                section_data.push(Vec::new());
707            }
708            shdrs.push(sh);
709        }
710        Ok((shdrs, section_data))
711    }
712
713    /// Raw on-disk bytes of the section at index `idx`, parallel to
714    /// [`Self::shdrs`]. Returns an empty slice for NOBITS or zero-size
715    /// sections. Returns `None` only for an out-of-range index.
716    #[must_use]
717    pub fn section_data(&self, idx: usize) -> Option<&[u8]> {
718        self.section_data.get(idx).map(Vec::as_slice)
719    }
720
721    /// Construct an [`Elf64File`] from already-parsed parts.
722    ///
723    /// Used by reconstructive code paths (such as `ud-compile`'s lower
724    /// path) that build the file's structure from a `.ud` AST rather
725    /// than from on-disk bytes. The caller is responsible for keeping
726    /// the parts consistent: `section_data` must be parallel to
727    /// `shdrs`, `padding` must cover every gap between structured
728    /// regions, and `file_size` must equal the total covered.
729    /// [`write_to_vec`](Self::write_to_vec) does no validation; it
730    /// assumes consistency.
731    #[must_use]
732    pub fn from_parts(
733        class: ElfClass,
734        ehdr: Ehdr64,
735        phdrs: Vec<Phdr64>,
736        shdrs: Vec<Shdr64>,
737        section_data: Vec<Vec<u8>>,
738        padding: Vec<(u64, Vec<u8>)>,
739        file_size: u64,
740    ) -> Self {
741        Self {
742            class,
743            ehdr,
744            phdrs,
745            shdrs,
746            section_data,
747            padding,
748            file_size,
749        }
750    }
751
752    /// Total size of the underlying file in bytes.
753    #[must_use]
754    pub fn file_size(&self) -> u64 {
755        self.file_size
756    }
757
758    /// All padding regions captured between structured regions.
759    /// Returns `(file_offset, bytes)` pairs in offset order.
760    #[must_use]
761    pub fn padding(&self) -> &[(u64, Vec<u8>)] {
762        &self.padding
763    }
764
765    /// Iterator over `(index, &Shdr64, &[u8])` for every section.
766    pub fn sections(&self) -> impl Iterator<Item = (usize, &Shdr64, &[u8])> {
767        self.shdrs
768            .iter()
769            .zip(&self.section_data)
770            .enumerate()
771            .map(|(i, (sh, data))| (i, sh, data.as_slice()))
772    }
773
774    /// Resolve the section's name through the section-header string
775    /// table (`.shstrtab`, indexed by `e_shstrndx`).
776    ///
777    /// Returns `None` if the section index is out of range, the
778    /// `e_shstrndx` points outside the section table, the name offset
779    /// is past the end of `.shstrtab`, or the bytes aren't valid UTF-8
780    /// (which would indicate a malformed or non-standard ELF; real
781    /// toolchains write ASCII section names).
782    #[must_use]
783    pub fn section_name(&self, idx: usize) -> Option<&str> {
784        let shstrtab = self.section_data(self.ehdr.e_shstrndx as usize)?;
785        let sh = self.shdrs.get(idx)?;
786        let start = sh.sh_name as usize;
787        let tail = shstrtab.get(start..)?;
788        let nul = tail.iter().position(|&b| b == 0)?;
789        std::str::from_utf8(&tail[..nul]).ok()
790    }
791
792    /// Find the first section with the given name.
793    ///
794    /// Iterates section headers in order, so for ELFs with multiple
795    /// sections sharing a name (rare but legal) the lowest-indexed one
796    /// wins.
797    #[must_use]
798    pub fn section_by_name(&self, name: &str) -> Option<(usize, &Shdr64, &[u8])> {
799        for (i, sh, data) in self.sections() {
800            if self.section_name(i) == Some(name) {
801                return Some((i, sh, data));
802            }
803        }
804        None
805    }
806
807    /// Serialize the parsed file back to bytes. For any input parsed from
808    /// real bytes, the output is byte-identical to the input.
809    #[must_use]
810    pub fn write_to_vec(&self) -> Vec<u8> {
811        let mut out = vec![0u8; self.file_size as usize];
812
813        let class = self.class;
814        self.ehdr
815            .write(class, &mut out[..ehdr_size(class) as usize]);
816
817        if !self.phdrs.is_empty() {
818            let start = self.ehdr.e_phoff as usize;
819            let entry_size = phdr_size(class) as usize;
820            for (i, ph) in self.phdrs.iter().enumerate() {
821                let off = start + i * entry_size;
822                ph.write(class, &mut out[off..off + entry_size]);
823            }
824        }
825
826        if !self.shdrs.is_empty() {
827            let start = self.ehdr.e_shoff as usize;
828            let entry_size = shdr_size(class) as usize;
829            for (i, sh) in self.shdrs.iter().enumerate() {
830                let off = start + i * entry_size;
831                sh.write(class, &mut out[off..off + entry_size]);
832            }
833        }
834
835        for (sh, data) in self.shdrs.iter().zip(&self.section_data) {
836            if sh.occupies_file() {
837                let off = sh.sh_offset as usize;
838                out[off..off + data.len()].copy_from_slice(data);
839            }
840        }
841
842        for (offset, bytes) in &self.padding {
843            let off = *offset as usize;
844            out[off..off + bytes.len()].copy_from_slice(bytes);
845        }
846
847        out
848    }
849}
850
851/// A "structured" file region — something the parser tracks by interpretation.
852#[derive(Debug, Clone)]
853struct Region {
854    label: String,
855    range: Range<u64>,
856}
857
858fn build_regions(class: ElfClass, ehdr: &Ehdr64, shdrs: &[Shdr64]) -> Result<Vec<Region>> {
859    let mut regions = Vec::new();
860
861    regions.push(Region {
862        label: "ELF header".into(),
863        range: 0..u64::from(ehdr_size(class)),
864    });
865
866    if ehdr.e_phnum > 0 {
867        let size = u64::from(ehdr.e_phnum) * u64::from(phdr_size(class));
868        let end = ehdr
869            .e_phoff
870            .checked_add(size)
871            .ok_or_else(|| Error::RegionOverflow {
872                label: "program-header table".into(),
873                offset: ehdr.e_phoff,
874                size,
875            })?;
876        regions.push(Region {
877            label: "program-header table".into(),
878            range: ehdr.e_phoff..end,
879        });
880    }
881
882    if ehdr.e_shnum > 0 {
883        let size = u64::from(ehdr.e_shnum) * u64::from(shdr_size(class));
884        let end = ehdr
885            .e_shoff
886            .checked_add(size)
887            .ok_or_else(|| Error::RegionOverflow {
888                label: "section-header table".into(),
889                offset: ehdr.e_shoff,
890                size,
891            })?;
892        regions.push(Region {
893            label: "section-header table".into(),
894            range: ehdr.e_shoff..end,
895        });
896    }
897
898    for (i, sh) in shdrs.iter().enumerate() {
899        if !sh.occupies_file() {
900            continue;
901        }
902        let end = sh
903            .sh_offset
904            .checked_add(sh.sh_size)
905            .ok_or_else(|| Error::RegionOverflow {
906                label: format!("section #{i}"),
907                offset: sh.sh_offset,
908                size: sh.sh_size,
909            })?;
910        regions.push(Region {
911            label: format!("section #{i}"),
912            range: sh.sh_offset..end,
913        });
914    }
915
916    regions.sort_by_key(|r| r.range.start);
917
918    for pair in regions.windows(2) {
919        let a = &pair[0];
920        let b = &pair[1];
921        if a.range.end > b.range.start {
922            return Err(Error::OverlappingRegions {
923                a_label: a.label.clone(),
924                a_start: a.range.start,
925                a_end: a.range.end,
926                b_label: b.label.clone(),
927                b_start: b.range.start,
928                b_end: b.range.end,
929            });
930        }
931    }
932
933    Ok(regions)
934}
935
936fn compute_padding(bytes: &[u8], regions: &[Region]) -> Vec<(u64, Vec<u8>)> {
937    let mut padding = Vec::new();
938    let file_end = bytes.len() as u64;
939    let mut cursor = 0u64;
940    for region in regions {
941        if region.range.start > cursor {
942            let start = cursor as usize;
943            let end = region.range.start as usize;
944            padding.push((cursor, bytes[start..end].to_vec()));
945        }
946        cursor = cursor.max(region.range.end);
947    }
948    if cursor < file_end {
949        let start = cursor as usize;
950        let end = file_end as usize;
951        padding.push((cursor, bytes[start..end].to_vec()));
952    }
953    padding
954}
955
956fn ensure_len(bytes: &[u8], offset: u64, needed: u64) -> Result<()> {
957    let have = bytes.len() as u64;
958    let end = offset.checked_add(needed).ok_or(Error::Truncated {
959        offset,
960        needed,
961        have,
962    })?;
963    if end > have {
964        return Err(Error::Truncated {
965            offset,
966            needed,
967            have,
968        });
969    }
970    Ok(())
971}
972
973fn read_u16(bytes: &[u8], at: usize) -> u16 {
974    u16::from_le_bytes(bytes[at..at + 2].try_into().expect("slice was 2 bytes"))
975}
976
977fn read_u32(bytes: &[u8], at: usize) -> u32 {
978    u32::from_le_bytes(bytes[at..at + 4].try_into().expect("slice was 4 bytes"))
979}
980
981fn read_u64(bytes: &[u8], at: usize) -> u64 {
982    u64::from_le_bytes(bytes[at..at + 8].try_into().expect("slice was 8 bytes"))
983}
984
985fn write_u16(bytes: &mut [u8], at: usize, value: u16) {
986    bytes[at..at + 2].copy_from_slice(&value.to_le_bytes());
987}
988
989fn write_u32(bytes: &mut [u8], at: usize, value: u32) {
990    bytes[at..at + 4].copy_from_slice(&value.to_le_bytes());
991}
992
993fn write_u64(bytes: &mut [u8], at: usize, value: u64) {
994    bytes[at..at + 8].copy_from_slice(&value.to_le_bytes());
995}
996
997#[cfg(test)]
998mod tests {
999    use super::*;
1000
1001    fn minimal_ehdr_bytes() -> Vec<u8> {
1002        let mut v = vec![0u8; EHDR64_SIZE as usize];
1003        v[0..4].copy_from_slice(&ELFMAG);
1004        v[4] = ELFCLASS64;
1005        v[5] = ELFDATA2LSB;
1006        v[6] = 1; // EV_CURRENT
1007                  // e_type = ET_NONE; e_machine = 0; e_version = 1; rest zeroed.
1008        v[20..24].copy_from_slice(&1u32.to_le_bytes());
1009        // e_ehsize = 64
1010        v[52..54].copy_from_slice(&EHDR64_SIZE.to_le_bytes());
1011        // e_phnum = 0, e_shnum = 0 → e_phentsize/e_shentsize unchecked
1012        v
1013    }
1014
1015    #[test]
1016    fn rejects_non_elf() {
1017        let mut v = minimal_ehdr_bytes();
1018        v[0] = 0xff;
1019        let err = Elf64File::parse(&v).unwrap_err();
1020        assert!(matches!(err, Error::BadMagic(_)));
1021    }
1022
1023    #[test]
1024    fn rejects_unknown_class() {
1025        let mut v = minimal_ehdr_bytes();
1026        v[4] = 7; // bogus class — neither ELFCLASS32 nor ELFCLASS64
1027        let err = Elf64File::parse(&v).unwrap_err();
1028        assert!(matches!(err, Error::UnsupportedClass(7)));
1029    }
1030
1031    #[test]
1032    fn rejects_big_endian() {
1033        let mut v = minimal_ehdr_bytes();
1034        v[5] = 2; // ELFDATA2MSB
1035        let err = Elf64File::parse(&v).unwrap_err();
1036        assert!(matches!(err, Error::UnsupportedEncoding(2)));
1037    }
1038
1039    #[test]
1040    fn parses_minimal_ehdr_only() {
1041        let v = minimal_ehdr_bytes();
1042        let file = Elf64File::parse(&v).expect("minimal ehdr should parse");
1043        assert_eq!(file.ehdr.e_ehsize, EHDR64_SIZE);
1044        assert!(file.phdrs.is_empty());
1045        assert!(file.shdrs.is_empty());
1046        assert_eq!(file.write_to_vec(), v);
1047    }
1048
1049    #[test]
1050    fn detects_truncation_in_phdrs() {
1051        let mut v = minimal_ehdr_bytes();
1052        v[56..58].copy_from_slice(&1u16.to_le_bytes()); // e_phnum = 1
1053        v[54..56].copy_from_slice(&PHDR64_SIZE.to_le_bytes());
1054        v[32..40].copy_from_slice(&64u64.to_le_bytes()); // e_phoff = 64
1055                                                         // file ends at 64 → no room for the phdr.
1056        let err = Elf64File::parse(&v).unwrap_err();
1057        assert!(matches!(err, Error::Truncated { .. }));
1058    }
1059}