Skip to main content

ud_format/
elf.rs

1//! ELF reader and writer with byte-identical round-trip.
2//!
3//! Handles both ELFCLASS32 and ELFCLASS64 little-endian images. The
4//! parsed representation always uses 64-bit-shaped headers (`Ehdr64`,
5//! `Phdr64`, `Shdr64`) regardless of input class — 32-bit fields are
6//! zero-extended on parse and truncated on write. The on-disk format
7//! is recorded in [`Elf64File::class`] and used to dispatch the right
8//! header layout when serialising.
9//!
10//! The contract: for any supported input `bytes`,
11//! `Elf64File::parse(bytes)?.write_to_vec() == bytes`.
12//!
13//! Anything not in scope for this crate is preserved as opaque bytes and
14//! re-emitted verbatim. Section *contents* (bytes inside `.text`, `.rodata`,
15//! `.symtab`, etc.) are never interpreted here — that belongs to the arch
16//! backends and analysis crates.
17
18#![allow(clippy::cast_possible_truncation)]
19
20use std::ops::Range;
21
22/// Size of `e_ident` in any ELF.
23const EI_NIDENT: usize = 16;
24
25/// ELF magic bytes (`\x7fELF`) at the start of `e_ident`.
26const ELFMAG: [u8; 4] = [0x7f, b'E', b'L', b'F'];
27
28/// `e_ident[EI_CLASS]` value for 32-bit objects.
29const ELFCLASS32: u8 = 1;
30
31/// `e_ident[EI_CLASS]` value for 64-bit objects.
32const ELFCLASS64: u8 = 2;
33
34/// `e_ident[EI_DATA]` value for 2's complement little-endian.
35const ELFDATA2LSB: u8 = 1;
36
37/// Whether the on-disk image used 32-bit or 64-bit headers. Recorded
38/// at parse time and consulted on write to round-trip the original
39/// byte layout exactly.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum ElfClass {
42    Elf32,
43    Elf64,
44}
45
46/// `sh_type` indicating the section occupies no file space (e.g. `.bss`).
47const SHT_NOBITS: u32 = 8;
48
49/// `sh_type` for a fully-linked symbol table.
50pub const SHT_SYMTAB: u32 = 2;
51
52/// `sh_type` for a string table.
53pub const SHT_STRTAB: u32 = 3;
54
55/// `sh_type` for the dynamic-linking symbol table (always present in dynamic
56/// executables and shared objects).
57pub const SHT_DYNSYM: u32 = 11;
58
59/// `sh_type` for a relocation table with explicit addends (`Elf64_Rela`).
60pub const SHT_RELA: u32 = 4;
61
62/// `sh_flags` bit indicating the section contains executable instructions.
63pub const SHF_EXECINSTR: u64 = 0x4;
64
65/// `e_machine` value for i386 (32-bit x86).
66pub const EM_386: u16 = 3;
67
68/// `e_machine` value for x86-64.
69pub const EM_X86_64: u16 = 62;
70
71/// `e_machine` value for `AArch64`.
72pub const EM_AARCH64: u16 = 183;
73
74/// On-disk size of an ELF64 ELF header.
75const EHDR64_SIZE: u16 = 64;
76
77/// On-disk size of an ELF64 program header entry.
78const PHDR64_SIZE: u16 = 56;
79
80/// On-disk size of an ELF64 section header entry.
81const SHDR64_SIZE: u16 = 64;
82
83/// On-disk size of an ELF32 ELF header.
84const EHDR32_SIZE: u16 = 52;
85
86/// On-disk size of an ELF32 program header entry.
87const PHDR32_SIZE: u16 = 32;
88
89/// On-disk size of an ELF32 section header entry.
90const SHDR32_SIZE: u16 = 40;
91
92const fn ehdr_size(class: ElfClass) -> u16 {
93    match class {
94        ElfClass::Elf32 => EHDR32_SIZE,
95        ElfClass::Elf64 => EHDR64_SIZE,
96    }
97}
98
99const fn phdr_size(class: ElfClass) -> u16 {
100    match class {
101        ElfClass::Elf32 => PHDR32_SIZE,
102        ElfClass::Elf64 => PHDR64_SIZE,
103    }
104}
105
106const fn shdr_size(class: ElfClass) -> u16 {
107    match class {
108        ElfClass::Elf32 => SHDR32_SIZE,
109        ElfClass::Elf64 => SHDR64_SIZE,
110    }
111}
112
113/// Errors surfaced when parsing or writing an ELF64 file.
114#[derive(Debug, thiserror::Error)]
115pub enum Error {
116    #[error("file too short: needed {needed} bytes at offset {offset}, have {have}")]
117    Truncated { offset: u64, needed: u64, have: u64 },
118
119    #[error("not an ELF file: bad magic {0:02x?}")]
120    BadMagic([u8; 4]),
121
122    #[error("unsupported ELF class: {0} (only ELFCLASS32 = 1 and ELFCLASS64 = 2 are implemented)")]
123    UnsupportedClass(u8),
124
125    #[error("unsupported ELF data encoding: {0} (only ELFDATA2LSB = 1 is implemented)")]
126    UnsupportedEncoding(u8),
127
128    #[error("unexpected e_ehsize: header says {got}, on-disk ELF64 size is {expected}")]
129    BadEhsize { got: u16, expected: u16 },
130
131    #[error("unexpected e_phentsize: header says {got}, on-disk ELF64 phdr size is {expected}")]
132    BadPhentsize { got: u16, expected: u16 },
133
134    #[error("unexpected e_shentsize: header says {got}, on-disk ELF64 shdr size is {expected}")]
135    BadShentsize { got: u16, expected: u16 },
136
137    #[error(
138        "structured regions overlap: {a_label} at {a_start}..{a_end} vs {b_label} at {b_start}..{b_end}"
139    )]
140    OverlappingRegions {
141        a_label: String,
142        a_start: u64,
143        a_end: u64,
144        b_label: String,
145        b_start: u64,
146        b_end: u64,
147    },
148
149    #[error("integer overflow computing region end for {label} at offset {offset} size {size}")]
150    RegionOverflow {
151        label: String,
152        offset: u64,
153        size: u64,
154    },
155}
156
157pub type Result<T, E = Error> = std::result::Result<T, E>;
158
159/// Parsed ELF64 ELF header.
160///
161/// Field names mirror the ELF spec verbatim. The struct is public so
162/// downstream crates can read these for analysis; mutation through public
163/// fields is *not* part of a stability contract yet — invariants like
164/// `e_ehsize == EHDR64_SIZE` are enforced only at parse time.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub struct Ehdr64 {
167    pub e_ident: [u8; EI_NIDENT],
168    pub e_type: u16,
169    pub e_machine: u16,
170    pub e_version: u32,
171    pub e_entry: u64,
172    pub e_phoff: u64,
173    pub e_shoff: u64,
174    pub e_flags: u32,
175    pub e_ehsize: u16,
176    pub e_phentsize: u16,
177    pub e_phnum: u16,
178    pub e_shentsize: u16,
179    pub e_shnum: u16,
180    pub e_shstrndx: u16,
181}
182
183impl Ehdr64 {
184    fn parse(bytes: &[u8]) -> Result<(Self, ElfClass)> {
185        if bytes.len() < EI_NIDENT {
186            return Err(Error::Truncated {
187                offset: 0,
188                needed: EI_NIDENT as u64,
189                have: bytes.len() as u64,
190            });
191        }
192        let mut e_ident = [0u8; EI_NIDENT];
193        e_ident.copy_from_slice(&bytes[..EI_NIDENT]);
194
195        if e_ident[0..4] != ELFMAG {
196            let mut bad = [0u8; 4];
197            bad.copy_from_slice(&e_ident[0..4]);
198            return Err(Error::BadMagic(bad));
199        }
200        if e_ident[5] != ELFDATA2LSB {
201            return Err(Error::UnsupportedEncoding(e_ident[5]));
202        }
203
204        let class = match e_ident[4] {
205            ELFCLASS32 => ElfClass::Elf32,
206            ELFCLASS64 => ElfClass::Elf64,
207            other => return Err(Error::UnsupportedClass(other)),
208        };
209
210        match class {
211            ElfClass::Elf32 => Self::parse_32(bytes, e_ident).map(|h| (h, class)),
212            ElfClass::Elf64 => Self::parse_64(bytes, e_ident).map(|h| (h, class)),
213        }
214    }
215
216    fn parse_64(bytes: &[u8], e_ident: [u8; EI_NIDENT]) -> Result<Self> {
217        ensure_len(bytes, 0, EHDR64_SIZE.into())?;
218        let e_type = read_u16(bytes, 16);
219        let e_machine = read_u16(bytes, 18);
220        let e_version = read_u32(bytes, 20);
221        let e_entry = read_u64(bytes, 24);
222        let e_phoff = read_u64(bytes, 32);
223        let e_shoff = read_u64(bytes, 40);
224        let e_flags = read_u32(bytes, 48);
225        let e_ehsize = read_u16(bytes, 52);
226        let e_phentsize = read_u16(bytes, 54);
227        let e_phnum = read_u16(bytes, 56);
228        let e_shentsize = read_u16(bytes, 58);
229        let e_shnum = read_u16(bytes, 60);
230        let e_shstrndx = read_u16(bytes, 62);
231
232        if e_ehsize != EHDR64_SIZE {
233            return Err(Error::BadEhsize {
234                got: e_ehsize,
235                expected: EHDR64_SIZE,
236            });
237        }
238        if e_phnum > 0 && e_phentsize != PHDR64_SIZE {
239            return Err(Error::BadPhentsize {
240                got: e_phentsize,
241                expected: PHDR64_SIZE,
242            });
243        }
244        if e_shnum > 0 && e_shentsize != SHDR64_SIZE {
245            return Err(Error::BadShentsize {
246                got: e_shentsize,
247                expected: SHDR64_SIZE,
248            });
249        }
250
251        Ok(Self {
252            e_ident,
253            e_type,
254            e_machine,
255            e_version,
256            e_entry,
257            e_phoff,
258            e_shoff,
259            e_flags,
260            e_ehsize,
261            e_phentsize,
262            e_phnum,
263            e_shentsize,
264            e_shnum,
265            e_shstrndx,
266        })
267    }
268
269    fn parse_32(bytes: &[u8], e_ident: [u8; EI_NIDENT]) -> Result<Self> {
270        ensure_len(bytes, 0, EHDR32_SIZE.into())?;
271        let e_type = read_u16(bytes, 16);
272        let e_machine = read_u16(bytes, 18);
273        let e_version = read_u32(bytes, 20);
274        let e_entry = u64::from(read_u32(bytes, 24));
275        let e_phoff = u64::from(read_u32(bytes, 28));
276        let e_shoff = u64::from(read_u32(bytes, 32));
277        let e_flags = read_u32(bytes, 36);
278        let e_ehsize = read_u16(bytes, 40);
279        let e_phentsize = read_u16(bytes, 42);
280        let e_phnum = read_u16(bytes, 44);
281        let e_shentsize = read_u16(bytes, 46);
282        let e_shnum = read_u16(bytes, 48);
283        let e_shstrndx = read_u16(bytes, 50);
284
285        if e_ehsize != EHDR32_SIZE {
286            return Err(Error::BadEhsize {
287                got: e_ehsize,
288                expected: EHDR32_SIZE,
289            });
290        }
291        if e_phnum > 0 && e_phentsize != PHDR32_SIZE {
292            return Err(Error::BadPhentsize {
293                got: e_phentsize,
294                expected: PHDR32_SIZE,
295            });
296        }
297        if e_shnum > 0 && e_shentsize != SHDR32_SIZE {
298            return Err(Error::BadShentsize {
299                got: e_shentsize,
300                expected: SHDR32_SIZE,
301            });
302        }
303
304        Ok(Self {
305            e_ident,
306            e_type,
307            e_machine,
308            e_version,
309            e_entry,
310            e_phoff,
311            e_shoff,
312            e_flags,
313            e_ehsize,
314            e_phentsize,
315            e_phnum,
316            e_shentsize,
317            e_shnum,
318            e_shstrndx,
319        })
320    }
321
322    fn write(&self, class: ElfClass, out: &mut [u8]) {
323        match class {
324            ElfClass::Elf64 => self.write_64(out),
325            ElfClass::Elf32 => self.write_32(out),
326        }
327    }
328
329    fn write_64(&self, out: &mut [u8]) {
330        debug_assert!(out.len() >= EHDR64_SIZE as usize);
331        out[..EI_NIDENT].copy_from_slice(&self.e_ident);
332        write_u16(out, 16, self.e_type);
333        write_u16(out, 18, self.e_machine);
334        write_u32(out, 20, self.e_version);
335        write_u64(out, 24, self.e_entry);
336        write_u64(out, 32, self.e_phoff);
337        write_u64(out, 40, self.e_shoff);
338        write_u32(out, 48, self.e_flags);
339        write_u16(out, 52, self.e_ehsize);
340        write_u16(out, 54, self.e_phentsize);
341        write_u16(out, 56, self.e_phnum);
342        write_u16(out, 58, self.e_shentsize);
343        write_u16(out, 60, self.e_shnum);
344        write_u16(out, 62, self.e_shstrndx);
345    }
346
347    fn write_32(&self, out: &mut [u8]) {
348        debug_assert!(out.len() >= EHDR32_SIZE as usize);
349        out[..EI_NIDENT].copy_from_slice(&self.e_ident);
350        write_u16(out, 16, self.e_type);
351        write_u16(out, 18, self.e_machine);
352        write_u32(out, 20, self.e_version);
353        write_u32(out, 24, self.e_entry as u32);
354        write_u32(out, 28, self.e_phoff as u32);
355        write_u32(out, 32, self.e_shoff as u32);
356        write_u32(out, 36, self.e_flags);
357        write_u16(out, 40, self.e_ehsize);
358        write_u16(out, 42, self.e_phentsize);
359        write_u16(out, 44, self.e_phnum);
360        write_u16(out, 46, self.e_shentsize);
361        write_u16(out, 48, self.e_shnum);
362        write_u16(out, 50, self.e_shstrndx);
363    }
364}
365
366/// Parsed ELF64 program header entry.
367#[derive(Debug, Clone, PartialEq, Eq)]
368pub struct Phdr64 {
369    pub p_type: u32,
370    pub p_flags: u32,
371    pub p_offset: u64,
372    pub p_vaddr: u64,
373    pub p_paddr: u64,
374    pub p_filesz: u64,
375    pub p_memsz: u64,
376    pub p_align: u64,
377}
378
379impl Phdr64 {
380    fn parse(class: ElfClass, bytes: &[u8]) -> Self {
381        match class {
382            ElfClass::Elf64 => Self::parse_64(bytes),
383            ElfClass::Elf32 => Self::parse_32(bytes),
384        }
385    }
386
387    fn parse_64(bytes: &[u8]) -> Self {
388        debug_assert!(bytes.len() >= PHDR64_SIZE as usize);
389        Self {
390            p_type: read_u32(bytes, 0),
391            p_flags: read_u32(bytes, 4),
392            p_offset: read_u64(bytes, 8),
393            p_vaddr: read_u64(bytes, 16),
394            p_paddr: read_u64(bytes, 24),
395            p_filesz: read_u64(bytes, 32),
396            p_memsz: read_u64(bytes, 40),
397            p_align: read_u64(bytes, 48),
398        }
399    }
400
401    fn parse_32(bytes: &[u8]) -> Self {
402        // Note the Elf32_Phdr field order differs from Elf64_Phdr:
403        //   type offset vaddr paddr filesz memsz flags align
404        // (whereas Elf64 places `flags` immediately after `type`).
405        debug_assert!(bytes.len() >= PHDR32_SIZE as usize);
406        Self {
407            p_type: read_u32(bytes, 0),
408            p_offset: u64::from(read_u32(bytes, 4)),
409            p_vaddr: u64::from(read_u32(bytes, 8)),
410            p_paddr: u64::from(read_u32(bytes, 12)),
411            p_filesz: u64::from(read_u32(bytes, 16)),
412            p_memsz: u64::from(read_u32(bytes, 20)),
413            p_flags: read_u32(bytes, 24),
414            p_align: u64::from(read_u32(bytes, 28)),
415        }
416    }
417
418    fn write(&self, class: ElfClass, out: &mut [u8]) {
419        match class {
420            ElfClass::Elf64 => self.write_64(out),
421            ElfClass::Elf32 => self.write_32(out),
422        }
423    }
424
425    fn write_64(&self, out: &mut [u8]) {
426        debug_assert!(out.len() >= PHDR64_SIZE as usize);
427        write_u32(out, 0, self.p_type);
428        write_u32(out, 4, self.p_flags);
429        write_u64(out, 8, self.p_offset);
430        write_u64(out, 16, self.p_vaddr);
431        write_u64(out, 24, self.p_paddr);
432        write_u64(out, 32, self.p_filesz);
433        write_u64(out, 40, self.p_memsz);
434        write_u64(out, 48, self.p_align);
435    }
436
437    fn write_32(&self, out: &mut [u8]) {
438        debug_assert!(out.len() >= PHDR32_SIZE as usize);
439        write_u32(out, 0, self.p_type);
440        write_u32(out, 4, self.p_offset as u32);
441        write_u32(out, 8, self.p_vaddr as u32);
442        write_u32(out, 12, self.p_paddr as u32);
443        write_u32(out, 16, self.p_filesz as u32);
444        write_u32(out, 20, self.p_memsz as u32);
445        write_u32(out, 24, self.p_flags);
446        write_u32(out, 28, self.p_align as u32);
447    }
448}
449
450/// Parsed ELF64 section header entry.
451#[derive(Debug, Clone, PartialEq, Eq)]
452pub struct Shdr64 {
453    pub sh_name: u32,
454    pub sh_type: u32,
455    pub sh_flags: u64,
456    pub sh_addr: u64,
457    pub sh_offset: u64,
458    pub sh_size: u64,
459    pub sh_link: u32,
460    pub sh_info: u32,
461    pub sh_addralign: u64,
462    pub sh_entsize: u64,
463}
464
465impl Shdr64 {
466    fn parse(class: ElfClass, bytes: &[u8]) -> Self {
467        match class {
468            ElfClass::Elf64 => Self::parse_64(bytes),
469            ElfClass::Elf32 => Self::parse_32(bytes),
470        }
471    }
472
473    fn parse_64(bytes: &[u8]) -> Self {
474        debug_assert!(bytes.len() >= SHDR64_SIZE as usize);
475        Self {
476            sh_name: read_u32(bytes, 0),
477            sh_type: read_u32(bytes, 4),
478            sh_flags: read_u64(bytes, 8),
479            sh_addr: read_u64(bytes, 16),
480            sh_offset: read_u64(bytes, 24),
481            sh_size: read_u64(bytes, 32),
482            sh_link: read_u32(bytes, 40),
483            sh_info: read_u32(bytes, 44),
484            sh_addralign: read_u64(bytes, 48),
485            sh_entsize: read_u64(bytes, 56),
486        }
487    }
488
489    fn parse_32(bytes: &[u8]) -> Self {
490        // Elf32_Shdr field order matches Elf64_Shdr; only widths differ.
491        debug_assert!(bytes.len() >= SHDR32_SIZE as usize);
492        Self {
493            sh_name: read_u32(bytes, 0),
494            sh_type: read_u32(bytes, 4),
495            sh_flags: u64::from(read_u32(bytes, 8)),
496            sh_addr: u64::from(read_u32(bytes, 12)),
497            sh_offset: u64::from(read_u32(bytes, 16)),
498            sh_size: u64::from(read_u32(bytes, 20)),
499            sh_link: read_u32(bytes, 24),
500            sh_info: read_u32(bytes, 28),
501            sh_addralign: u64::from(read_u32(bytes, 32)),
502            sh_entsize: u64::from(read_u32(bytes, 36)),
503        }
504    }
505
506    fn write(&self, class: ElfClass, out: &mut [u8]) {
507        match class {
508            ElfClass::Elf64 => self.write_64(out),
509            ElfClass::Elf32 => self.write_32(out),
510        }
511    }
512
513    fn write_64(&self, out: &mut [u8]) {
514        debug_assert!(out.len() >= SHDR64_SIZE as usize);
515        write_u32(out, 0, self.sh_name);
516        write_u32(out, 4, self.sh_type);
517        write_u64(out, 8, self.sh_flags);
518        write_u64(out, 16, self.sh_addr);
519        write_u64(out, 24, self.sh_offset);
520        write_u64(out, 32, self.sh_size);
521        write_u32(out, 40, self.sh_link);
522        write_u32(out, 44, self.sh_info);
523        write_u64(out, 48, self.sh_addralign);
524        write_u64(out, 56, self.sh_entsize);
525    }
526
527    fn write_32(&self, out: &mut [u8]) {
528        debug_assert!(out.len() >= SHDR32_SIZE as usize);
529        write_u32(out, 0, self.sh_name);
530        write_u32(out, 4, self.sh_type);
531        write_u32(out, 8, self.sh_flags as u32);
532        write_u32(out, 12, self.sh_addr as u32);
533        write_u32(out, 16, self.sh_offset as u32);
534        write_u32(out, 20, self.sh_size as u32);
535        write_u32(out, 24, self.sh_link);
536        write_u32(out, 28, self.sh_info);
537        write_u32(out, 32, self.sh_addralign as u32);
538        write_u32(out, 36, self.sh_entsize as u32);
539    }
540
541    fn occupies_file(&self) -> bool {
542        self.sh_type != SHT_NOBITS && self.sh_size > 0
543    }
544}
545
546/// A parsed ELF64 file in a form that round-trips byte-identically.
547///
548/// The structured fields (`ehdr`, `phdrs`, `shdrs`) are interpreted; the
549/// raw bytes inside sections and any interstitial padding are stored
550/// verbatim. On `write_to_vec`, the structured fields are reassembled and
551/// the verbatim bytes are dropped back in place at their original offsets.
552#[derive(Debug, Clone)]
553pub struct Elf64File {
554    /// On-disk header layout. Determines whether the headers
555    /// re-emit as 32-bit or 64-bit on serialisation.
556    pub class: ElfClass,
557
558    pub ehdr: Ehdr64,
559    pub phdrs: Vec<Phdr64>,
560    pub shdrs: Vec<Shdr64>,
561
562    /// Section file content, parallel to `shdrs`. Empty for NOBITS or
563    /// zero-size sections.
564    section_data: Vec<Vec<u8>>,
565
566    /// Bytes that fall in the gaps between structured regions (e.g.
567    /// alignment padding between sections). Stored as `(file_offset, bytes)`.
568    padding: Vec<(u64, Vec<u8>)>,
569
570    /// Total size of the file, in bytes.
571    file_size: u64,
572}
573
574/// Returns true if `bytes` start with the ELF magic.
575///
576/// This says nothing about class (32 vs 64) or endianness — a true return
577/// means *some* flavor of ELF, not necessarily one this crate supports.
578#[must_use]
579pub fn is_elf(bytes: &[u8]) -> bool {
580    bytes.len() >= 4 && bytes[..4] == ELFMAG
581}
582
583/// Returns true iff `bytes` are an ELF little-endian image of either
584/// class — the flavors [`Elf64File::parse`] handles. Callers that
585/// route by format (e.g. the CLI's round-trip pipeline) should gate
586/// on this and fall through to a byte-copy for unsupported variants
587/// so the round-trip contract still holds.
588#[must_use]
589pub fn is_elf64_le(bytes: &[u8]) -> bool {
590    bytes.len() >= 6
591        && bytes[..4] == ELFMAG
592        && (bytes[4] == ELFCLASS32 || bytes[4] == ELFCLASS64)
593        && bytes[5] == ELFDATA2LSB
594}
595
596impl Elf64File {
597    /// Parse an ELF LE file (either ELFCLASS32 or ELFCLASS64) into a
598    /// structure that round-trips byte-identically.
599    pub fn parse(bytes: &[u8]) -> Result<Self> {
600        let (ehdr, class) = Ehdr64::parse(bytes)?;
601
602        let phdrs = Self::parse_phdrs(class, bytes, &ehdr)?;
603        let (shdrs, section_data) = Self::parse_shdrs_and_sections(class, bytes, &ehdr)?;
604
605        let regions = build_regions(class, &ehdr, &shdrs)?;
606        let padding = compute_padding(bytes, &regions);
607
608        Ok(Self {
609            class,
610            ehdr,
611            phdrs,
612            shdrs,
613            section_data,
614            padding,
615            file_size: bytes.len() as u64,
616        })
617    }
618
619    fn parse_phdrs(class: ElfClass, bytes: &[u8], ehdr: &Ehdr64) -> Result<Vec<Phdr64>> {
620        let count = ehdr.e_phnum as usize;
621        if count == 0 {
622            return Ok(Vec::new());
623        }
624        let entry_size = phdr_size(class) as usize;
625        let total = count
626            .checked_mul(entry_size)
627            .ok_or_else(|| Error::RegionOverflow {
628                label: "program-header table".into(),
629                offset: ehdr.e_phoff,
630                size: count as u64 * entry_size as u64,
631            })?;
632        ensure_len(bytes, ehdr.e_phoff, total as u64)?;
633        let start = ehdr.e_phoff as usize;
634        let mut phdrs = Vec::with_capacity(count);
635        for i in 0..count {
636            let off = start + i * entry_size;
637            phdrs.push(Phdr64::parse(class, &bytes[off..off + entry_size]));
638        }
639        Ok(phdrs)
640    }
641
642    fn parse_shdrs_and_sections(
643        class: ElfClass,
644        bytes: &[u8],
645        ehdr: &Ehdr64,
646    ) -> Result<(Vec<Shdr64>, Vec<Vec<u8>>)> {
647        let count = ehdr.e_shnum as usize;
648        if count == 0 {
649            return Ok((Vec::new(), Vec::new()));
650        }
651        let entry_size = shdr_size(class) as usize;
652        let total = count
653            .checked_mul(entry_size)
654            .ok_or_else(|| Error::RegionOverflow {
655                label: "section-header table".into(),
656                offset: ehdr.e_shoff,
657                size: count as u64 * entry_size as u64,
658            })?;
659        ensure_len(bytes, ehdr.e_shoff, total as u64)?;
660        let start = ehdr.e_shoff as usize;
661
662        let mut shdrs = Vec::with_capacity(count);
663        let mut section_data = Vec::with_capacity(count);
664        for i in 0..count {
665            let off = start + i * entry_size;
666            let sh = Shdr64::parse(class, &bytes[off..off + entry_size]);
667            if sh.occupies_file() {
668                ensure_len(bytes, sh.sh_offset, sh.sh_size)?;
669                let data_off = sh.sh_offset as usize;
670                let data_end = data_off + sh.sh_size as usize;
671                section_data.push(bytes[data_off..data_end].to_vec());
672            } else {
673                section_data.push(Vec::new());
674            }
675            shdrs.push(sh);
676        }
677        Ok((shdrs, section_data))
678    }
679
680    /// Raw on-disk bytes of the section at index `idx`, parallel to
681    /// [`Self::shdrs`]. Returns an empty slice for NOBITS or zero-size
682    /// sections. Returns `None` only for an out-of-range index.
683    #[must_use]
684    pub fn section_data(&self, idx: usize) -> Option<&[u8]> {
685        self.section_data.get(idx).map(Vec::as_slice)
686    }
687
688    /// Construct an [`Elf64File`] from already-parsed parts.
689    ///
690    /// Used by reconstructive code paths (such as `ud-compile`'s lower
691    /// path) that build the file's structure from a `.ud` AST rather
692    /// than from on-disk bytes. The caller is responsible for keeping
693    /// the parts consistent: `section_data` must be parallel to
694    /// `shdrs`, `padding` must cover every gap between structured
695    /// regions, and `file_size` must equal the total covered.
696    /// [`write_to_vec`](Self::write_to_vec) does no validation; it
697    /// assumes consistency.
698    #[must_use]
699    pub fn from_parts(
700        class: ElfClass,
701        ehdr: Ehdr64,
702        phdrs: Vec<Phdr64>,
703        shdrs: Vec<Shdr64>,
704        section_data: Vec<Vec<u8>>,
705        padding: Vec<(u64, Vec<u8>)>,
706        file_size: u64,
707    ) -> Self {
708        Self {
709            class,
710            ehdr,
711            phdrs,
712            shdrs,
713            section_data,
714            padding,
715            file_size,
716        }
717    }
718
719    /// Total size of the underlying file in bytes.
720    #[must_use]
721    pub fn file_size(&self) -> u64 {
722        self.file_size
723    }
724
725    /// All padding regions captured between structured regions.
726    /// Returns `(file_offset, bytes)` pairs in offset order.
727    #[must_use]
728    pub fn padding(&self) -> &[(u64, Vec<u8>)] {
729        &self.padding
730    }
731
732    /// Iterator over `(index, &Shdr64, &[u8])` for every section.
733    pub fn sections(&self) -> impl Iterator<Item = (usize, &Shdr64, &[u8])> {
734        self.shdrs
735            .iter()
736            .zip(&self.section_data)
737            .enumerate()
738            .map(|(i, (sh, data))| (i, sh, data.as_slice()))
739    }
740
741    /// Resolve the section's name through the section-header string
742    /// table (`.shstrtab`, indexed by `e_shstrndx`).
743    ///
744    /// Returns `None` if the section index is out of range, the
745    /// `e_shstrndx` points outside the section table, the name offset
746    /// is past the end of `.shstrtab`, or the bytes aren't valid UTF-8
747    /// (which would indicate a malformed or non-standard ELF; real
748    /// toolchains write ASCII section names).
749    #[must_use]
750    pub fn section_name(&self, idx: usize) -> Option<&str> {
751        let shstrtab = self.section_data(self.ehdr.e_shstrndx as usize)?;
752        let sh = self.shdrs.get(idx)?;
753        let start = sh.sh_name as usize;
754        let tail = shstrtab.get(start..)?;
755        let nul = tail.iter().position(|&b| b == 0)?;
756        std::str::from_utf8(&tail[..nul]).ok()
757    }
758
759    /// Find the first section with the given name.
760    ///
761    /// Iterates section headers in order, so for ELFs with multiple
762    /// sections sharing a name (rare but legal) the lowest-indexed one
763    /// wins.
764    #[must_use]
765    pub fn section_by_name(&self, name: &str) -> Option<(usize, &Shdr64, &[u8])> {
766        for (i, sh, data) in self.sections() {
767            if self.section_name(i) == Some(name) {
768                return Some((i, sh, data));
769            }
770        }
771        None
772    }
773
774    /// Serialize the parsed file back to bytes. For any input parsed from
775    /// real bytes, the output is byte-identical to the input.
776    #[must_use]
777    pub fn write_to_vec(&self) -> Vec<u8> {
778        let mut out = vec![0u8; self.file_size as usize];
779
780        let class = self.class;
781        self.ehdr
782            .write(class, &mut out[..ehdr_size(class) as usize]);
783
784        if !self.phdrs.is_empty() {
785            let start = self.ehdr.e_phoff as usize;
786            let entry_size = phdr_size(class) as usize;
787            for (i, ph) in self.phdrs.iter().enumerate() {
788                let off = start + i * entry_size;
789                ph.write(class, &mut out[off..off + entry_size]);
790            }
791        }
792
793        if !self.shdrs.is_empty() {
794            let start = self.ehdr.e_shoff as usize;
795            let entry_size = shdr_size(class) as usize;
796            for (i, sh) in self.shdrs.iter().enumerate() {
797                let off = start + i * entry_size;
798                sh.write(class, &mut out[off..off + entry_size]);
799            }
800        }
801
802        for (sh, data) in self.shdrs.iter().zip(&self.section_data) {
803            if sh.occupies_file() {
804                let off = sh.sh_offset as usize;
805                out[off..off + data.len()].copy_from_slice(data);
806            }
807        }
808
809        for (offset, bytes) in &self.padding {
810            let off = *offset as usize;
811            out[off..off + bytes.len()].copy_from_slice(bytes);
812        }
813
814        out
815    }
816}
817
818/// A "structured" file region — something the parser tracks by interpretation.
819#[derive(Debug, Clone)]
820struct Region {
821    label: String,
822    range: Range<u64>,
823}
824
825fn build_regions(class: ElfClass, ehdr: &Ehdr64, shdrs: &[Shdr64]) -> Result<Vec<Region>> {
826    let mut regions = Vec::new();
827
828    regions.push(Region {
829        label: "ELF header".into(),
830        range: 0..u64::from(ehdr_size(class)),
831    });
832
833    if ehdr.e_phnum > 0 {
834        let size = u64::from(ehdr.e_phnum) * u64::from(phdr_size(class));
835        let end = ehdr
836            .e_phoff
837            .checked_add(size)
838            .ok_or_else(|| Error::RegionOverflow {
839                label: "program-header table".into(),
840                offset: ehdr.e_phoff,
841                size,
842            })?;
843        regions.push(Region {
844            label: "program-header table".into(),
845            range: ehdr.e_phoff..end,
846        });
847    }
848
849    if ehdr.e_shnum > 0 {
850        let size = u64::from(ehdr.e_shnum) * u64::from(shdr_size(class));
851        let end = ehdr
852            .e_shoff
853            .checked_add(size)
854            .ok_or_else(|| Error::RegionOverflow {
855                label: "section-header table".into(),
856                offset: ehdr.e_shoff,
857                size,
858            })?;
859        regions.push(Region {
860            label: "section-header table".into(),
861            range: ehdr.e_shoff..end,
862        });
863    }
864
865    for (i, sh) in shdrs.iter().enumerate() {
866        if !sh.occupies_file() {
867            continue;
868        }
869        let end = sh
870            .sh_offset
871            .checked_add(sh.sh_size)
872            .ok_or_else(|| Error::RegionOverflow {
873                label: format!("section #{i}"),
874                offset: sh.sh_offset,
875                size: sh.sh_size,
876            })?;
877        regions.push(Region {
878            label: format!("section #{i}"),
879            range: sh.sh_offset..end,
880        });
881    }
882
883    regions.sort_by_key(|r| r.range.start);
884
885    for pair in regions.windows(2) {
886        let a = &pair[0];
887        let b = &pair[1];
888        if a.range.end > b.range.start {
889            return Err(Error::OverlappingRegions {
890                a_label: a.label.clone(),
891                a_start: a.range.start,
892                a_end: a.range.end,
893                b_label: b.label.clone(),
894                b_start: b.range.start,
895                b_end: b.range.end,
896            });
897        }
898    }
899
900    Ok(regions)
901}
902
903fn compute_padding(bytes: &[u8], regions: &[Region]) -> Vec<(u64, Vec<u8>)> {
904    let mut padding = Vec::new();
905    let file_end = bytes.len() as u64;
906    let mut cursor = 0u64;
907    for region in regions {
908        if region.range.start > cursor {
909            let start = cursor as usize;
910            let end = region.range.start as usize;
911            padding.push((cursor, bytes[start..end].to_vec()));
912        }
913        cursor = cursor.max(region.range.end);
914    }
915    if cursor < file_end {
916        let start = cursor as usize;
917        let end = file_end as usize;
918        padding.push((cursor, bytes[start..end].to_vec()));
919    }
920    padding
921}
922
923fn ensure_len(bytes: &[u8], offset: u64, needed: u64) -> Result<()> {
924    let have = bytes.len() as u64;
925    let end = offset.checked_add(needed).ok_or(Error::Truncated {
926        offset,
927        needed,
928        have,
929    })?;
930    if end > have {
931        return Err(Error::Truncated {
932            offset,
933            needed,
934            have,
935        });
936    }
937    Ok(())
938}
939
940fn read_u16(bytes: &[u8], at: usize) -> u16 {
941    u16::from_le_bytes(bytes[at..at + 2].try_into().expect("slice was 2 bytes"))
942}
943
944fn read_u32(bytes: &[u8], at: usize) -> u32 {
945    u32::from_le_bytes(bytes[at..at + 4].try_into().expect("slice was 4 bytes"))
946}
947
948fn read_u64(bytes: &[u8], at: usize) -> u64 {
949    u64::from_le_bytes(bytes[at..at + 8].try_into().expect("slice was 8 bytes"))
950}
951
952fn write_u16(bytes: &mut [u8], at: usize, value: u16) {
953    bytes[at..at + 2].copy_from_slice(&value.to_le_bytes());
954}
955
956fn write_u32(bytes: &mut [u8], at: usize, value: u32) {
957    bytes[at..at + 4].copy_from_slice(&value.to_le_bytes());
958}
959
960fn write_u64(bytes: &mut [u8], at: usize, value: u64) {
961    bytes[at..at + 8].copy_from_slice(&value.to_le_bytes());
962}
963
964#[cfg(test)]
965mod tests {
966    use super::*;
967
968    fn minimal_ehdr_bytes() -> Vec<u8> {
969        let mut v = vec![0u8; EHDR64_SIZE as usize];
970        v[0..4].copy_from_slice(&ELFMAG);
971        v[4] = ELFCLASS64;
972        v[5] = ELFDATA2LSB;
973        v[6] = 1; // EV_CURRENT
974                  // e_type = ET_NONE; e_machine = 0; e_version = 1; rest zeroed.
975        v[20..24].copy_from_slice(&1u32.to_le_bytes());
976        // e_ehsize = 64
977        v[52..54].copy_from_slice(&EHDR64_SIZE.to_le_bytes());
978        // e_phnum = 0, e_shnum = 0 → e_phentsize/e_shentsize unchecked
979        v
980    }
981
982    #[test]
983    fn rejects_non_elf() {
984        let mut v = minimal_ehdr_bytes();
985        v[0] = 0xff;
986        let err = Elf64File::parse(&v).unwrap_err();
987        assert!(matches!(err, Error::BadMagic(_)));
988    }
989
990    #[test]
991    fn rejects_unknown_class() {
992        let mut v = minimal_ehdr_bytes();
993        v[4] = 7; // bogus class — neither ELFCLASS32 nor ELFCLASS64
994        let err = Elf64File::parse(&v).unwrap_err();
995        assert!(matches!(err, Error::UnsupportedClass(7)));
996    }
997
998    #[test]
999    fn rejects_big_endian() {
1000        let mut v = minimal_ehdr_bytes();
1001        v[5] = 2; // ELFDATA2MSB
1002        let err = Elf64File::parse(&v).unwrap_err();
1003        assert!(matches!(err, Error::UnsupportedEncoding(2)));
1004    }
1005
1006    #[test]
1007    fn parses_minimal_ehdr_only() {
1008        let v = minimal_ehdr_bytes();
1009        let file = Elf64File::parse(&v).expect("minimal ehdr should parse");
1010        assert_eq!(file.ehdr.e_ehsize, EHDR64_SIZE);
1011        assert!(file.phdrs.is_empty());
1012        assert!(file.shdrs.is_empty());
1013        assert_eq!(file.write_to_vec(), v);
1014    }
1015
1016    #[test]
1017    fn detects_truncation_in_phdrs() {
1018        let mut v = minimal_ehdr_bytes();
1019        v[56..58].copy_from_slice(&1u16.to_le_bytes()); // e_phnum = 1
1020        v[54..56].copy_from_slice(&PHDR64_SIZE.to_le_bytes());
1021        v[32..40].copy_from_slice(&64u64.to_le_bytes()); // e_phoff = 64
1022                                                         // file ends at 64 → no room for the phdr.
1023        let err = Elf64File::parse(&v).unwrap_err();
1024        assert!(matches!(err, Error::Truncated { .. }));
1025    }
1026}