Skip to main content

ud_format/
pe.rs

1//! PE/COFF reader and writer with byte-identical round-trip.
2//!
3//! v0 scope: parse the structural skeleton (DOS header, PE
4//! signature, COFF file header, optional header, section header
5//! table) into typed fields, and capture every byte of the input
6//! file so [`PeFile::write_to_vec`] returns it back unchanged.
7//! Section *contents* and any data outside the structural skeleton
8//! (DOS stub, optional header body, certificate table, etc.) are
9//! preserved verbatim and not re-interpreted.
10//!
11//! The contract: for any supported input `bytes`,
12//! `PeFile::parse(bytes)?.write_to_vec() == bytes`.
13//!
14//! Down the road this crate will grow:
15//!
16//! * Structured optional-header fields and data-directory entries.
17//! * Editable section data with a write path that re-derives
18//!   PointerToRawData / SizeOfRawData on serialise.
19//! * Import-table parsing so the analysis crate can name PE call
20//!   sites the way ELF's `ud-analysis::plt` names PLT thunks.
21//!
22//! For now the parser exists to validate input is a real PE and
23//! expose section metadata for higher layers; the byte-identity
24//! comes from re-emitting the original buffer.
25
26#![allow(clippy::cast_possible_truncation)]
27
28/// `e_magic` value of `IMAGE_DOS_HEADER`: ASCII "MZ".
29pub const DOS_MAGIC: [u8; 2] = *b"MZ";
30
31/// PE signature appearing at `IMAGE_DOS_HEADER::e_lfanew`: ASCII
32/// "PE\0\0".
33pub const PE_SIGNATURE: [u8; 4] = *b"PE\0\0";
34
35/// `Machine` value for i386 (`IMAGE_FILE_MACHINE_I386`).
36pub const IMAGE_FILE_MACHINE_I386: u16 = 0x014c;
37
38/// `Machine` value for x86-64 (`IMAGE_FILE_MACHINE_AMD64`).
39pub const IMAGE_FILE_MACHINE_AMD64: u16 = 0x8664;
40
41/// `Machine` value for `AArch64` (`IMAGE_FILE_MACHINE_ARM64`).
42pub const IMAGE_FILE_MACHINE_ARM64: u16 = 0xaa64;
43
44/// On-disk size of `IMAGE_DOS_HEADER`.
45const DOS_HEADER_SIZE: usize = 64;
46
47/// Offset of `e_lfanew` within `IMAGE_DOS_HEADER`.
48const E_LFANEW_OFFSET: usize = 0x3c;
49
50/// On-disk size of `IMAGE_FILE_HEADER` (the COFF header).
51const COFF_HEADER_SIZE: usize = 20;
52
53/// On-disk size of an `IMAGE_SECTION_HEADER` entry.
54pub const SECTION_HEADER_SIZE: usize = 40;
55
56/// On-disk size of one COFF symbol-table entry (main or aux).
57pub const COFF_SYMBOL_SIZE: usize = 18;
58
59/// `Type` field high nibble: function (`IMAGE_SYM_DTYPE_FUNCTION`).
60pub const COFF_DTYPE_FUNCTION: u16 = 0x20;
61
62/// `StorageClass`: external (`IMAGE_SYM_CLASS_EXTERNAL`).
63pub const COFF_SYM_CLASS_EXTERNAL: u8 = 2;
64
65/// `StorageClass`: static (`IMAGE_SYM_CLASS_STATIC`).
66pub const COFF_SYM_CLASS_STATIC: u8 = 3;
67
68/// `Magic` value at the start of `IMAGE_OPTIONAL_HEADER` for PE32
69/// (32-bit images).
70pub const OPTIONAL_HEADER_MAGIC_PE32: u16 = 0x010b;
71
72/// `Magic` value at the start of `IMAGE_OPTIONAL_HEADER64` for PE32+
73/// (64-bit images).
74pub const OPTIONAL_HEADER_MAGIC_PE32_PLUS: u16 = 0x020b;
75
76/// Errors surfaced when parsing or writing a PE file.
77#[derive(Debug, thiserror::Error)]
78pub enum Error {
79    #[error("file too short: needed {needed} bytes at offset {offset}, have {have}")]
80    Truncated { offset: u64, needed: u64, have: u64 },
81
82    #[error("not a PE file: bad DOS magic {0:02x?}")]
83    BadDosMagic([u8; 2]),
84
85    #[error("`e_lfanew` 0x{e_lfanew:x} points outside the file (size {file_size})")]
86    LfanewOutOfRange { e_lfanew: u32, file_size: u64 },
87
88    #[error("not a PE file: PE signature is {0:02x?}")]
89    BadPeSignature([u8; 4]),
90
91    #[error("optional-header magic 0x{0:04x} is neither PE32 (0x10b) nor PE32+ (0x20b)")]
92    UnsupportedOptionalMagic(u16),
93
94    #[error("integer overflow computing region end for {label} at offset {offset} size {size}")]
95    RegionOverflow {
96        label: String,
97        offset: u64,
98        size: u64,
99    },
100}
101
102pub type Result<T, E = Error> = std::result::Result<T, E>;
103
104/// PE32 vs PE32+ — the optional header's structural variant.
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106pub enum PeKind {
107    /// PE32 (32-bit image).
108    Pe32,
109    /// PE32+ (64-bit image).
110    Pe32Plus,
111}
112
113/// Parsed `IMAGE_DOS_HEADER` (the 64-byte prefix every PE file
114/// starts with). The fields that aren't meaningful for modern
115/// PE files (the original 16-bit DOS layout descriptors) round
116/// through verbatim — typical values are `e_cblp = 0x90`,
117/// `e_cparhdr = 0x4`, `e_minalloc = 0`, `e_maxalloc = 0xffff`,
118/// `e_sp = 0xb8`, with reserved fields zero. The two fields
119/// that matter for the modern format are `e_magic` (`"MZ"`)
120/// and `e_lfanew` (file offset of the PE signature).
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub struct DosHeader {
123    pub e_magic: [u8; 2],
124    pub e_cblp: u16,
125    pub e_cp: u16,
126    pub e_crlc: u16,
127    pub e_cparhdr: u16,
128    pub e_minalloc: u16,
129    pub e_maxalloc: u16,
130    pub e_ss: u16,
131    pub e_sp: u16,
132    pub e_csum: u16,
133    pub e_ip: u16,
134    pub e_cs: u16,
135    pub e_lfarlc: u16,
136    pub e_ovno: u16,
137    pub e_res: [u16; 4],
138    pub e_oemid: u16,
139    pub e_oeminfo: u16,
140    pub e_res2: [u16; 10],
141    pub e_lfanew: u32,
142}
143
144impl DosHeader {
145    fn parse(bytes: &[u8]) -> Self {
146        let mut h = DosHeader {
147            e_magic: [bytes[0], bytes[1]],
148            e_cblp: read_u16(bytes, 2),
149            e_cp: read_u16(bytes, 4),
150            e_crlc: read_u16(bytes, 6),
151            e_cparhdr: read_u16(bytes, 8),
152            e_minalloc: read_u16(bytes, 10),
153            e_maxalloc: read_u16(bytes, 12),
154            e_ss: read_u16(bytes, 14),
155            e_sp: read_u16(bytes, 16),
156            e_csum: read_u16(bytes, 18),
157            e_ip: read_u16(bytes, 20),
158            e_cs: read_u16(bytes, 22),
159            e_lfarlc: read_u16(bytes, 24),
160            e_ovno: read_u16(bytes, 26),
161            e_res: [0; 4],
162            e_oemid: read_u16(bytes, 36),
163            e_oeminfo: read_u16(bytes, 38),
164            e_res2: [0; 10],
165            e_lfanew: read_u32(bytes, E_LFANEW_OFFSET),
166        };
167        for i in 0..4 {
168            h.e_res[i] = read_u16(bytes, 28 + 2 * i);
169        }
170        for i in 0..10 {
171            h.e_res2[i] = read_u16(bytes, 40 + 2 * i);
172        }
173        h
174    }
175
176    /// Encode the 64-byte DOS header.
177    #[must_use]
178    pub fn encode(&self) -> [u8; 64] {
179        let mut out = [0u8; 64];
180        out[0..2].copy_from_slice(&self.e_magic);
181        out[2..4].copy_from_slice(&self.e_cblp.to_le_bytes());
182        out[4..6].copy_from_slice(&self.e_cp.to_le_bytes());
183        out[6..8].copy_from_slice(&self.e_crlc.to_le_bytes());
184        out[8..10].copy_from_slice(&self.e_cparhdr.to_le_bytes());
185        out[10..12].copy_from_slice(&self.e_minalloc.to_le_bytes());
186        out[12..14].copy_from_slice(&self.e_maxalloc.to_le_bytes());
187        out[14..16].copy_from_slice(&self.e_ss.to_le_bytes());
188        out[16..18].copy_from_slice(&self.e_sp.to_le_bytes());
189        out[18..20].copy_from_slice(&self.e_csum.to_le_bytes());
190        out[20..22].copy_from_slice(&self.e_ip.to_le_bytes());
191        out[22..24].copy_from_slice(&self.e_cs.to_le_bytes());
192        out[24..26].copy_from_slice(&self.e_lfarlc.to_le_bytes());
193        out[26..28].copy_from_slice(&self.e_ovno.to_le_bytes());
194        for i in 0..4 {
195            out[28 + 2 * i..30 + 2 * i].copy_from_slice(&self.e_res[i].to_le_bytes());
196        }
197        out[36..38].copy_from_slice(&self.e_oemid.to_le_bytes());
198        out[38..40].copy_from_slice(&self.e_oeminfo.to_le_bytes());
199        for i in 0..10 {
200            out[40 + 2 * i..42 + 2 * i].copy_from_slice(&self.e_res2[i].to_le_bytes());
201        }
202        out[E_LFANEW_OFFSET..E_LFANEW_OFFSET + 4].copy_from_slice(&self.e_lfanew.to_le_bytes());
203        out
204    }
205}
206
207/// Parsed `IMAGE_OPTIONAL_HEADER` / `IMAGE_OPTIONAL_HEADER64`.
208/// One struct handles both PE32 and PE32+ variants; the
209/// 32-bit ImageBase / stack / heap sizes are stored as `u64`
210/// for uniformity and zero-extended on read.
211///
212/// The data directories at the tail of the optional header
213/// aren't stored here — see [`PeFile::data_directories`]. The
214/// `number_of_rva_and_sizes` field tells the encoder how many
215/// directory slots to emit.
216#[derive(Debug, Clone, PartialEq, Eq)]
217pub struct OptionalHeader {
218    pub magic: u16,
219    pub major_linker_version: u8,
220    pub minor_linker_version: u8,
221    pub size_of_code: u32,
222    pub size_of_initialized_data: u32,
223    pub size_of_uninitialized_data: u32,
224    pub address_of_entry_point: u32,
225    pub base_of_code: u32,
226    /// PE32 only — the address of the data section. Always 0
227    /// in PE32+ since 64-bit images don't have this field.
228    pub base_of_data: u32,
229    pub image_base: u64,
230    pub section_alignment: u32,
231    pub file_alignment: u32,
232    pub major_operating_system_version: u16,
233    pub minor_operating_system_version: u16,
234    pub major_image_version: u16,
235    pub minor_image_version: u16,
236    pub major_subsystem_version: u16,
237    pub minor_subsystem_version: u16,
238    pub win32_version_value: u32,
239    pub size_of_image: u32,
240    pub size_of_headers: u32,
241    pub check_sum: u32,
242    pub subsystem: u16,
243    pub dll_characteristics: u16,
244    pub size_of_stack_reserve: u64,
245    pub size_of_stack_commit: u64,
246    pub size_of_heap_reserve: u64,
247    pub size_of_heap_commit: u64,
248    pub loader_flags: u32,
249    pub number_of_rva_and_sizes: u32,
250}
251
252impl OptionalHeader {
253    /// Parse from `bytes` (whose length is `coff.size_of_optional_header`).
254    /// Returns `None` when the buffer is too short or the magic is
255    /// neither PE32 nor PE32+; the parser falls back to a default-zero
256    /// header in those cases.
257    fn parse(bytes: &[u8]) -> Option<Self> {
258        if bytes.len() < 2 {
259            return Option::None;
260        }
261        let magic = read_u16(bytes, 0);
262        match magic {
263            OPTIONAL_HEADER_MAGIC_PE32 => Self::parse_pe32(bytes, magic),
264            OPTIONAL_HEADER_MAGIC_PE32_PLUS => Self::parse_pe32_plus(bytes, magic),
265            _ => Option::None,
266        }
267    }
268
269    fn parse_pe32(bytes: &[u8], magic: u16) -> Option<Self> {
270        if bytes.len() < 96 {
271            return Option::None;
272        }
273        Some(Self {
274            magic,
275            major_linker_version: bytes[2],
276            minor_linker_version: bytes[3],
277            size_of_code: read_u32(bytes, 4),
278            size_of_initialized_data: read_u32(bytes, 8),
279            size_of_uninitialized_data: read_u32(bytes, 12),
280            address_of_entry_point: read_u32(bytes, 16),
281            base_of_code: read_u32(bytes, 20),
282            base_of_data: read_u32(bytes, 24),
283            image_base: u64::from(read_u32(bytes, 28)),
284            section_alignment: read_u32(bytes, 32),
285            file_alignment: read_u32(bytes, 36),
286            major_operating_system_version: read_u16(bytes, 40),
287            minor_operating_system_version: read_u16(bytes, 42),
288            major_image_version: read_u16(bytes, 44),
289            minor_image_version: read_u16(bytes, 46),
290            major_subsystem_version: read_u16(bytes, 48),
291            minor_subsystem_version: read_u16(bytes, 50),
292            win32_version_value: read_u32(bytes, 52),
293            size_of_image: read_u32(bytes, 56),
294            size_of_headers: read_u32(bytes, 60),
295            check_sum: read_u32(bytes, 64),
296            subsystem: read_u16(bytes, 68),
297            dll_characteristics: read_u16(bytes, 70),
298            size_of_stack_reserve: u64::from(read_u32(bytes, 72)),
299            size_of_stack_commit: u64::from(read_u32(bytes, 76)),
300            size_of_heap_reserve: u64::from(read_u32(bytes, 80)),
301            size_of_heap_commit: u64::from(read_u32(bytes, 84)),
302            loader_flags: read_u32(bytes, 88),
303            number_of_rva_and_sizes: read_u32(bytes, 92),
304        })
305    }
306
307    fn parse_pe32_plus(bytes: &[u8], magic: u16) -> Option<Self> {
308        if bytes.len() < 112 {
309            return Option::None;
310        }
311        Some(Self {
312            magic,
313            major_linker_version: bytes[2],
314            minor_linker_version: bytes[3],
315            size_of_code: read_u32(bytes, 4),
316            size_of_initialized_data: read_u32(bytes, 8),
317            size_of_uninitialized_data: read_u32(bytes, 12),
318            address_of_entry_point: read_u32(bytes, 16),
319            base_of_code: read_u32(bytes, 20),
320            base_of_data: 0,
321            image_base: read_u64(bytes, 24),
322            section_alignment: read_u32(bytes, 32),
323            file_alignment: read_u32(bytes, 36),
324            major_operating_system_version: read_u16(bytes, 40),
325            minor_operating_system_version: read_u16(bytes, 42),
326            major_image_version: read_u16(bytes, 44),
327            minor_image_version: read_u16(bytes, 46),
328            major_subsystem_version: read_u16(bytes, 48),
329            minor_subsystem_version: read_u16(bytes, 50),
330            win32_version_value: read_u32(bytes, 52),
331            size_of_image: read_u32(bytes, 56),
332            size_of_headers: read_u32(bytes, 60),
333            check_sum: read_u32(bytes, 64),
334            subsystem: read_u16(bytes, 68),
335            dll_characteristics: read_u16(bytes, 70),
336            size_of_stack_reserve: read_u64(bytes, 72),
337            size_of_stack_commit: read_u64(bytes, 80),
338            size_of_heap_reserve: read_u64(bytes, 88),
339            size_of_heap_commit: read_u64(bytes, 96),
340            loader_flags: read_u32(bytes, 104),
341            number_of_rva_and_sizes: read_u32(bytes, 108),
342        })
343    }
344
345    /// Encode the optional header (without trailing data
346    /// directories) into the buffer at offset 0. Returns the
347    /// number of bytes written (96 for PE32, 112 for PE32+).
348    /// The caller appends the data-directory entries after.
349    #[must_use]
350    pub fn encode(&self) -> Vec<u8> {
351        let mut out = Vec::with_capacity(112);
352        out.extend_from_slice(&self.magic.to_le_bytes());
353        out.push(self.major_linker_version);
354        out.push(self.minor_linker_version);
355        out.extend_from_slice(&self.size_of_code.to_le_bytes());
356        out.extend_from_slice(&self.size_of_initialized_data.to_le_bytes());
357        out.extend_from_slice(&self.size_of_uninitialized_data.to_le_bytes());
358        out.extend_from_slice(&self.address_of_entry_point.to_le_bytes());
359        out.extend_from_slice(&self.base_of_code.to_le_bytes());
360        match self.magic {
361            OPTIONAL_HEADER_MAGIC_PE32 => {
362                out.extend_from_slice(&self.base_of_data.to_le_bytes());
363                out.extend_from_slice(&(self.image_base as u32).to_le_bytes());
364            }
365            _ => {
366                // OPTIONAL_HEADER_MAGIC_PE32_PLUS (and any other
367                // future magic) — 64-bit `image_base`.
368                out.extend_from_slice(&self.image_base.to_le_bytes());
369            }
370        }
371        out.extend_from_slice(&self.section_alignment.to_le_bytes());
372        out.extend_from_slice(&self.file_alignment.to_le_bytes());
373        out.extend_from_slice(&self.major_operating_system_version.to_le_bytes());
374        out.extend_from_slice(&self.minor_operating_system_version.to_le_bytes());
375        out.extend_from_slice(&self.major_image_version.to_le_bytes());
376        out.extend_from_slice(&self.minor_image_version.to_le_bytes());
377        out.extend_from_slice(&self.major_subsystem_version.to_le_bytes());
378        out.extend_from_slice(&self.minor_subsystem_version.to_le_bytes());
379        out.extend_from_slice(&self.win32_version_value.to_le_bytes());
380        out.extend_from_slice(&self.size_of_image.to_le_bytes());
381        out.extend_from_slice(&self.size_of_headers.to_le_bytes());
382        out.extend_from_slice(&self.check_sum.to_le_bytes());
383        out.extend_from_slice(&self.subsystem.to_le_bytes());
384        out.extend_from_slice(&self.dll_characteristics.to_le_bytes());
385        if self.magic == OPTIONAL_HEADER_MAGIC_PE32 {
386            out.extend_from_slice(&(self.size_of_stack_reserve as u32).to_le_bytes());
387            out.extend_from_slice(&(self.size_of_stack_commit as u32).to_le_bytes());
388            out.extend_from_slice(&(self.size_of_heap_reserve as u32).to_le_bytes());
389            out.extend_from_slice(&(self.size_of_heap_commit as u32).to_le_bytes());
390        } else {
391            out.extend_from_slice(&self.size_of_stack_reserve.to_le_bytes());
392            out.extend_from_slice(&self.size_of_stack_commit.to_le_bytes());
393            out.extend_from_slice(&self.size_of_heap_reserve.to_le_bytes());
394            out.extend_from_slice(&self.size_of_heap_commit.to_le_bytes());
395        }
396        out.extend_from_slice(&self.loader_flags.to_le_bytes());
397        out.extend_from_slice(&self.number_of_rva_and_sizes.to_le_bytes());
398        out
399    }
400}
401
402/// Parsed `IMAGE_FILE_HEADER` (a.k.a. COFF header).
403#[derive(Debug, Clone, PartialEq, Eq)]
404pub struct CoffHeader {
405    pub machine: u16,
406    pub number_of_sections: u16,
407    pub time_date_stamp: u32,
408    pub pointer_to_symbol_table: u32,
409    pub number_of_symbols: u32,
410    pub size_of_optional_header: u16,
411    pub characteristics: u16,
412}
413
414/// One main COFF symbol-table entry, with its name resolved through
415/// the string table when needed. Aux records are skipped on iteration
416/// (their `aux_count` field on the preceding main symbol governs
417/// how many to skip).
418#[derive(Debug, Clone, PartialEq, Eq)]
419pub struct CoffSymbol {
420    /// The symbol's name. For "long" names (those whose first 4
421    /// bytes are zero), the trailing 4 bytes index into the COFF
422    /// string table; we resolve the indirection here. Empty when
423    /// the indirection points outside the string table.
424    pub name: String,
425    /// Value associated with the symbol. For function symbols
426    /// defined in a section, this is the offset within the section.
427    pub value: u32,
428    /// 1-indexed section number; 0 for undefined, -1 for
429    /// `IMAGE_SYM_ABSOLUTE`, -2 for `IMAGE_SYM_DEBUG`.
430    pub section_number: i16,
431    /// Combined low-byte (base type) + high-byte (derived type).
432    /// Functions have the [`COFF_DTYPE_FUNCTION`] high nibble.
433    pub type_: u16,
434    /// `IMAGE_SYM_CLASS_*` value.
435    pub storage_class: u8,
436    /// Number of trailing aux records belonging to this symbol.
437    pub aux_count: u8,
438}
439
440impl CoffSymbol {
441    /// True when this symbol's `Type` field marks it a function.
442    #[must_use]
443    pub fn is_function(&self) -> bool {
444        (self.type_ & 0xf0) == COFF_DTYPE_FUNCTION
445    }
446}
447
448/// Parsed `IMAGE_SECTION_HEADER`.
449///
450/// `name` is the raw 8-byte field; for "long" names that start with
451/// `'/'` followed by a decimal offset into the COFF string table,
452/// callers are responsible for resolving via the symbol table.
453#[derive(Debug, Clone, PartialEq, Eq)]
454pub struct SectionHeader {
455    pub name: [u8; 8],
456    pub virtual_size: u32,
457    pub virtual_address: u32,
458    pub size_of_raw_data: u32,
459    pub pointer_to_raw_data: u32,
460    pub pointer_to_relocations: u32,
461    pub pointer_to_linenumbers: u32,
462    pub number_of_relocations: u16,
463    pub number_of_linenumbers: u16,
464    pub characteristics: u32,
465}
466
467/// A parsed PE file. The structured fields are read-only views; the
468/// authoritative bytes live in the private `raw` buffer and are what
469/// [`write_to_vec`] returns. Future iterations will replace this with
470/// a re-derive-on-write path; for v0 the round-trip is guaranteed
471/// trivially because we don't mutate the buffer.
472///
473/// [`write_to_vec`]: PeFile::write_to_vec
474#[derive(Debug, Clone)]
475pub struct PeFile {
476    /// Optional-header magic (PE32 vs PE32+).
477    pub kind: PeKind,
478    /// File offset of the PE signature (same value as
479    /// `dos.e_lfanew`, surfaced separately for convenience).
480    pub e_lfanew: u32,
481    /// Parsed DOS header. The fields that don't matter for
482    /// modern PE files (the original 16-bit DOS descriptors)
483    /// round through verbatim.
484    pub dos: DosHeader,
485    /// The DOS stub program — bytes between the end of the DOS
486    /// header (offset 64) and `e_lfanew`. Treated as opaque;
487    /// the loader doesn't execute this in 32/64-bit OS, but
488    /// most linkers ship the canonical "This program cannot be
489    /// run in DOS mode" stub. We preserve whatever bytes are
490    /// there.
491    pub dos_stub: Vec<u8>,
492    /// COFF header values.
493    pub coff: CoffHeader,
494    /// Optional header values. `None` for object files (which
495    /// have no optional header — `coff.size_of_optional_header`
496    /// is 0).
497    pub optional: Option<OptionalHeader>,
498    /// `ImageBase` from the optional header — the run-time virtual
499    /// address the loader maps the file to. Section RVAs are added
500    /// to this to form full VAs at run time. Zero when the file has
501    /// no optional header (object files).
502    pub image_base: u64,
503    /// `AddressOfEntryPoint` from the optional header — the RVA the
504    /// loader jumps to after mapping the image. For an executable
505    /// this is `_start` / `mainCRTStartup`; for a DLL this is
506    /// `DllMain`. Zero when the file has no optional header.
507    pub address_of_entry_point: u32,
508    /// Data directories from the optional header. Index 0 is the
509    /// Export Table; index 1 the Import Table; etc. Standard PE
510    /// reserves 16 entries; we parse `NumberOfRvaAndSizes` of them.
511    pub data_directories: Vec<DataDirectory>,
512    /// Section header table, in declaration order.
513    pub sections: Vec<SectionHeader>,
514    /// The complete file bytes; this is what `write_to_vec`
515    /// returns, byte-for-byte.
516    raw: Vec<u8>,
517}
518
519/// One (RVA, size) pair from the optional header's data directory
520/// array. Both fields are zero when the entry is unused.
521#[derive(Debug, Clone, Copy, PartialEq, Eq)]
522pub struct DataDirectory {
523    pub virtual_address: u32,
524    pub size: u32,
525}
526
527/// Index of the Export Table entry in `data_directories`.
528pub const DATA_DIR_EXPORT: usize = 0;
529
530/// Index of the Import Table entry in `data_directories`.
531pub const DATA_DIR_IMPORT: usize = 1;
532
533impl PeFile {
534    /// Parse a PE file. Validates the structural skeleton (DOS
535    /// header, PE signature, COFF + optional + section headers) but
536    /// leaves the rest as opaque bytes.
537    #[allow(clippy::too_many_lines)]
538    pub fn parse(bytes: &[u8]) -> Result<Self> {
539        if bytes.len() < DOS_HEADER_SIZE {
540            return Err(Error::Truncated {
541                offset: 0,
542                needed: DOS_HEADER_SIZE as u64,
543                have: bytes.len() as u64,
544            });
545        }
546        let mut dos_magic = [0u8; 2];
547        dos_magic.copy_from_slice(&bytes[..2]);
548        if dos_magic != DOS_MAGIC {
549            return Err(Error::BadDosMagic(dos_magic));
550        }
551
552        let dos = DosHeader::parse(&bytes[..DOS_HEADER_SIZE]);
553        let e_lfanew = dos.e_lfanew;
554        let stub_end = (e_lfanew as usize).min(bytes.len());
555        let dos_stub = if stub_end > DOS_HEADER_SIZE {
556            bytes[DOS_HEADER_SIZE..stub_end].to_vec()
557        } else {
558            Vec::new()
559        };
560        let pe_off = e_lfanew as usize;
561        if (pe_off as u64) > bytes.len() as u64 {
562            return Err(Error::LfanewOutOfRange {
563                e_lfanew,
564                file_size: bytes.len() as u64,
565            });
566        }
567        ensure_len(bytes, pe_off as u64, 4)?;
568        let mut sig = [0u8; 4];
569        sig.copy_from_slice(&bytes[pe_off..pe_off + 4]);
570        if sig != PE_SIGNATURE {
571            return Err(Error::BadPeSignature(sig));
572        }
573
574        let coff_off = pe_off + 4;
575        ensure_len(bytes, coff_off as u64, COFF_HEADER_SIZE as u64)?;
576        let coff = parse_coff_header(&bytes[coff_off..coff_off + COFF_HEADER_SIZE]);
577
578        let opt_off = coff_off + COFF_HEADER_SIZE;
579        let opt_size = coff.size_of_optional_header as usize;
580        ensure_len(bytes, opt_off as u64, opt_size as u64)?;
581        let mut image_base: u64 = 0;
582        let mut address_of_entry_point: u32 = 0;
583        let mut data_directories: Vec<DataDirectory> = Vec::new();
584        let optional = if opt_size > 0 {
585            OptionalHeader::parse(&bytes[opt_off..opt_off + opt_size])
586        } else {
587            Option::None
588        };
589        let kind = if opt_size == 0 {
590            // Object files have no optional header. Default to PE32+
591            // for typing purposes; the kind is informational only.
592            PeKind::Pe32Plus
593        } else {
594            ensure_len(bytes, opt_off as u64, 2)?;
595            let magic = read_u16(bytes, opt_off);
596            // AddressOfEntryPoint sits at offset 16 in both variants.
597            if opt_size >= 20 {
598                address_of_entry_point = read_u32(bytes, opt_off + 16);
599            }
600            let (variant, data_dir_off) = match magic {
601                OPTIONAL_HEADER_MAGIC_PE32 => {
602                    // PE32 ImageBase is at offset 28, 4 bytes.
603                    if opt_size >= 32 {
604                        image_base = u64::from(read_u32(bytes, opt_off + 28));
605                    }
606                    (PeKind::Pe32, 96usize)
607                }
608                OPTIONAL_HEADER_MAGIC_PE32_PLUS => {
609                    // PE32+ ImageBase is at offset 24, 8 bytes.
610                    if opt_size >= 32 {
611                        image_base = read_u64(bytes, opt_off + 24);
612                    }
613                    (PeKind::Pe32Plus, 112usize)
614                }
615                other => return Err(Error::UnsupportedOptionalMagic(other)),
616            };
617            // NumberOfRvaAndSizes lives at data_dir_off - 4 (right
618            // before the data directories table). Each entry is
619            // 8 bytes (RVA + size).
620            if opt_size >= data_dir_off {
621                let count_off = data_dir_off - 4;
622                let count = read_u32(bytes, opt_off + count_off) as usize;
623                let dirs_bytes_needed = count.saturating_mul(8);
624                if opt_size >= data_dir_off + dirs_bytes_needed {
625                    for i in 0..count {
626                        let off = opt_off + data_dir_off + i * 8;
627                        data_directories.push(DataDirectory {
628                            virtual_address: read_u32(bytes, off),
629                            size: read_u32(bytes, off + 4),
630                        });
631                    }
632                }
633            }
634            variant
635        };
636
637        let sec_off = opt_off + opt_size;
638        let sec_count = coff.number_of_sections as usize;
639        let sec_total =
640            sec_count
641                .checked_mul(SECTION_HEADER_SIZE)
642                .ok_or_else(|| Error::RegionOverflow {
643                    label: "section header table".into(),
644                    offset: sec_off as u64,
645                    size: sec_count as u64 * SECTION_HEADER_SIZE as u64,
646                })?;
647        ensure_len(bytes, sec_off as u64, sec_total as u64)?;
648        let mut sections = Vec::with_capacity(sec_count);
649        for i in 0..sec_count {
650            let off = sec_off + i * SECTION_HEADER_SIZE;
651            sections.push(parse_section_header(&bytes[off..off + SECTION_HEADER_SIZE]));
652        }
653
654        Ok(Self {
655            kind,
656            e_lfanew,
657            dos,
658            dos_stub,
659            coff,
660            optional,
661            image_base,
662            address_of_entry_point,
663            data_directories,
664            sections,
665            raw: bytes.to_vec(),
666        })
667    }
668
669    /// Total size of the parsed file in bytes.
670    #[must_use]
671    pub fn file_size(&self) -> u64 {
672        self.raw.len() as u64
673    }
674
675    /// Raw bytes of the entire file. Stable as long as `PeFile`
676    /// hasn't been mutated through a (currently nonexistent) edit
677    /// API.
678    #[must_use]
679    pub fn raw_bytes(&self) -> &[u8] {
680        &self.raw
681    }
682
683    /// Raw bytes of `sections[idx]`'s on-disk contents, or `None`
684    /// for an out-of-range index. Returns an empty slice when the
685    /// section's `SizeOfRawData` is zero (uninitialised data, e.g.
686    /// `.bss`).
687    #[must_use]
688    pub fn section_data(&self, idx: usize) -> Option<&[u8]> {
689        let sh = self.sections.get(idx)?;
690        let start = sh.pointer_to_raw_data as usize;
691        let size = sh.size_of_raw_data as usize;
692        if size == 0 {
693            return Some(&[]);
694        }
695        self.raw.get(start..start.checked_add(size)?)
696    }
697
698    /// Resolve a section header's "short" name as a UTF-8 string
699    /// trimmed to the first NUL. Long names (those starting with
700    /// `'/'` followed by a decimal offset) are returned verbatim;
701    /// the COFF string table that resolves them isn't yet parsed.
702    #[must_use]
703    pub fn section_name(&self, idx: usize) -> Option<&str> {
704        let sh = self.sections.get(idx)?;
705        let nul = sh
706            .name
707            .iter()
708            .position(|&b| b == 0)
709            .unwrap_or(sh.name.len());
710        std::str::from_utf8(&sh.name[..nul]).ok()
711    }
712
713    /// Iterate the COFF symbol table, skipping aux records.
714    ///
715    /// Returns an empty iterator when the file declares no symbol
716    /// table (`pointer_to_symbol_table == 0` or
717    /// `number_of_symbols == 0`) or when the table runs past the
718    /// end of the file.
719    #[must_use]
720    pub fn coff_symbols(&self) -> Vec<CoffSymbol> {
721        let sym_off = self.coff.pointer_to_symbol_table as usize;
722        let count = self.coff.number_of_symbols as usize;
723        if sym_off == 0 || count == 0 {
724            return Vec::new();
725        }
726        let table_size = count * COFF_SYMBOL_SIZE;
727        let Some(table_end) = sym_off.checked_add(table_size) else {
728            return Vec::new();
729        };
730        if table_end > self.raw.len() {
731            return Vec::new();
732        }
733        let table = &self.raw[sym_off..table_end];
734
735        // String table: contiguous block right after the symbol
736        // table. First u32 is its total size (including the field
737        // itself); names are NUL-terminated past offset 4.
738        let str_off = table_end;
739        let strtab = self.raw.get(str_off..).unwrap_or(&[]);
740
741        let mut out = Vec::new();
742        let mut i = 0usize;
743        while i < count {
744            let off = i * COFF_SYMBOL_SIZE;
745            let chunk = &table[off..off + COFF_SYMBOL_SIZE];
746            let aux_count = chunk[17] as usize;
747            let name = decode_coff_symbol_name(&chunk[0..8], strtab);
748            let value = read_u32(chunk, 8);
749            #[allow(clippy::cast_possible_wrap)]
750            let section_number = read_u16(chunk, 12) as i16;
751            let type_ = read_u16(chunk, 14);
752            let storage_class = chunk[16];
753            out.push(CoffSymbol {
754                name,
755                value,
756                section_number,
757                type_,
758                storage_class,
759                aux_count: chunk[17],
760            });
761            i = i.saturating_add(1).saturating_add(aux_count);
762        }
763        out
764    }
765
766    /// Translate an RVA to a file offset by finding the section that
767    /// contains it and computing `pointer_to_raw_data + (rva -
768    /// virtual_address)`. Returns `None` for RVAs outside every
769    /// section, or when the resulting offset would land past the
770    /// file's bytes.
771    #[must_use]
772    pub fn rva_to_file_offset(&self, rva: u32) -> Option<usize> {
773        for sh in &self.sections {
774            let start = sh.virtual_address;
775            let size = sh.virtual_size.max(sh.size_of_raw_data);
776            let end = start.checked_add(size)?;
777            if rva >= start && rva < end {
778                let off_in_section = rva - start;
779                if off_in_section >= sh.size_of_raw_data {
780                    return None; // lies inside virtual-only space
781                }
782                let file_off = sh.pointer_to_raw_data.checked_add(off_in_section)?;
783                if (file_off as usize) >= self.raw.len() {
784                    return None;
785                }
786                return Some(file_off as usize);
787            }
788        }
789        None
790    }
791
792    /// Read a slice of `len` bytes starting at the given RVA, or
793    /// `None` if it's outside the file. Convenience over
794    /// `rva_to_file_offset` + slicing.
795    #[must_use]
796    pub fn slice_at_rva(&self, rva: u32, len: usize) -> Option<&[u8]> {
797        let off = self.rva_to_file_offset(rva)?;
798        self.raw.get(off..off.checked_add(len)?)
799    }
800
801    /// Parse the Export Directory (data directory 0) if it exists
802    /// and is populated, returning one [`PeExport`] per advertised
803    /// export. An empty result either means "no export table" or
804    /// "table present but lists zero functions".
805    #[must_use]
806    pub fn exports(&self) -> Vec<PeExport> {
807        let Some(dir) = self.data_directories.get(DATA_DIR_EXPORT) else {
808            return Vec::new();
809        };
810        if dir.virtual_address == 0 || dir.size == 0 {
811            return Vec::new();
812        }
813        let Some(hdr) = self.slice_at_rva(dir.virtual_address, 40) else {
814            return Vec::new();
815        };
816        let ordinal_base = read_u32(hdr, 16);
817        let n_functions = read_u32(hdr, 20) as usize;
818        let n_names = read_u32(hdr, 24) as usize;
819        let addr_of_functions = read_u32(hdr, 28);
820        let addr_of_names = read_u32(hdr, 32);
821        let addr_of_name_ordinals = read_u32(hdr, 36);
822
823        // Build ordinal -> name map from the parallel name + ordinal
824        // arrays. Most exports are named; pure-ordinal exports leave
825        // their slot in this map empty.
826        let mut name_of_ordinal: std::collections::HashMap<u32, String> =
827            std::collections::HashMap::new();
828        if let Some(names) = self.slice_at_rva(addr_of_names, n_names.saturating_mul(4)) {
829            if let Some(ords) = self.slice_at_rva(addr_of_name_ordinals, n_names.saturating_mul(2))
830            {
831                for i in 0..n_names {
832                    let name_rva = read_u32(names, i * 4);
833                    let ord_idx = u32::from(read_u16(ords, i * 2));
834                    if let Some(name) = self.read_cstring_at_rva(name_rva) {
835                        name_of_ordinal.insert(ord_idx, name);
836                    }
837                }
838            }
839        }
840
841        let mut out = Vec::with_capacity(n_functions);
842        let Some(funcs) = self.slice_at_rva(addr_of_functions, n_functions.saturating_mul(4))
843        else {
844            return out;
845        };
846        for i in 0..n_functions {
847            let func_rva = read_u32(funcs, i * 4);
848            if func_rva == 0 {
849                continue; // empty slot (gap in the ordinal range)
850            }
851            // Forwarder exports: the RVA points into the Export
852            // Directory itself (so it's an ASCII redirect string,
853            // not real code). Skip those — they don't correspond
854            // to local code we can lift.
855            let dir_end = dir.virtual_address.wrapping_add(dir.size);
856            if func_rva >= dir.virtual_address && func_rva < dir_end {
857                continue;
858            }
859            out.push(PeExport {
860                ordinal: ordinal_base + i as u32,
861                rva: func_rva,
862                name: name_of_ordinal.get(&(i as u32)).cloned(),
863            });
864        }
865        out
866    }
867
868    /// Parse the Import Directory (data directory 1) if it exists
869    /// and is populated, returning one [`PeImport`] per IAT slot
870    /// across every imported DLL. Each entry records the slot's
871    /// run-time virtual address, the DLL the symbol comes from,
872    /// and either a name (for by-name imports) or an ordinal (for
873    /// by-ordinal imports).
874    ///
875    /// Returns an empty vector when there's no import table, or
876    /// when the table is malformed (missing INT/IAT data).
877    #[must_use]
878    pub fn imports(&self) -> Vec<PeImport> {
879        let Some(dir) = self.data_directories.get(DATA_DIR_IMPORT) else {
880            return Vec::new();
881        };
882        if dir.virtual_address == 0 || dir.size == 0 {
883            return Vec::new();
884        }
885        // Thunk entry size: 4 bytes for PE32, 8 for PE32+.
886        let thunk_size = match self.kind {
887            PeKind::Pe32 => 4usize,
888            PeKind::Pe32Plus => 8usize,
889        };
890        let mut out: Vec<PeImport> = Vec::new();
891        // Each descriptor is 20 bytes; walk until we hit the
892        // all-zero terminator.
893        let mut desc_rva = dir.virtual_address;
894        for _ in 0..1024 {
895            // Bound the walk so a malformed binary can't run forever.
896            let Some(desc) = self.slice_at_rva(desc_rva, 20) else {
897                break;
898            };
899            let original_first_thunk = read_u32(desc, 0);
900            let _time_date_stamp = read_u32(desc, 4);
901            let _forwarder_chain = read_u32(desc, 8);
902            let name_rva = read_u32(desc, 12);
903            let first_thunk = read_u32(desc, 16);
904            if original_first_thunk == 0 && name_rva == 0 && first_thunk == 0 {
905                break;
906            }
907            let dll_name = self.read_cstring_at_rva(name_rva).unwrap_or_default();
908            // Walk INT for names, IAT slot addresses come from
909            // the FirstThunk RVA + ImageBase + slot offset. If
910            // OriginalFirstThunk is zero (some linkers omit it
911            // for "bound" imports), fall back to FirstThunk.
912            let int_rva = if original_first_thunk != 0 {
913                original_first_thunk
914            } else {
915                first_thunk
916            };
917            for idx in 0..1u32 << 20 {
918                let off = (idx as usize).saturating_mul(thunk_size);
919                let Some(thunk_bytes) =
920                    self.slice_at_rva(int_rva.wrapping_add(off as u32), thunk_size)
921                else {
922                    break;
923                };
924                let thunk_val: u64 = match self.kind {
925                    PeKind::Pe32 => u64::from(read_u32(thunk_bytes, 0)),
926                    PeKind::Pe32Plus => read_u64(thunk_bytes, 0),
927                };
928                if thunk_val == 0 {
929                    break;
930                }
931                let iat_va = self.image_base + u64::from(first_thunk.wrapping_add(off as u32));
932                let ordinal_flag: u64 = match self.kind {
933                    PeKind::Pe32 => 0x8000_0000,
934                    PeKind::Pe32Plus => 0x8000_0000_0000_0000,
935                };
936                let (ordinal, name) = if thunk_val & ordinal_flag != 0 {
937                    (Some((thunk_val & 0xFFFF) as u16), None)
938                } else {
939                    // Low bits are an RVA to IMAGE_IMPORT_BY_NAME:
940                    // 2-byte hint then the NUL-terminated name.
941                    #[allow(clippy::cast_possible_truncation)]
942                    let by_name_rva = (thunk_val & 0xFFFF_FFFF) as u32;
943                    let name = self.read_cstring_at_rva(by_name_rva.wrapping_add(2));
944                    (None, name)
945                };
946                out.push(PeImport {
947                    iat_va,
948                    dll_name: dll_name.clone(),
949                    name,
950                    ordinal,
951                });
952            }
953            desc_rva = desc_rva.wrapping_add(20);
954        }
955        out
956    }
957
958    /// Read an ASCII NUL-terminated string at `rva`, capped at a
959    /// sensible upper bound to avoid runaway scans on malformed
960    /// images. Returns `None` if the RVA can't be resolved.
961    fn read_cstring_at_rva(&self, rva: u32) -> Option<String> {
962        let off = self.rva_to_file_offset(rva)?;
963        let slice = self.raw.get(off..)?;
964        let end = slice.iter().take(512).position(|&b| b == 0).unwrap_or(0);
965        if end == 0 {
966            return None;
967        }
968        std::str::from_utf8(&slice[..end]).ok().map(str::to_string)
969    }
970
971    /// Build a [`PeFile`] from already-structured pieces, without
972    /// needing a pre-existing raw buffer. Used by the source
973    /// lower path (`ud_compile::lower_to_pe`) when reading the
974    /// PE skeleton from `@module.build` and reassembling the
975    /// bytes for round-trip. The DOS stub bytes, alignment
976    /// padding, and section content arrive via the `extra_bytes`
977    /// list — each `(file_offset, bytes)` tuple is copied into
978    /// the buffer at its offset, after the structured headers
979    /// are written.
980    #[must_use]
981    #[allow(clippy::too_many_arguments)]
982    pub fn from_parts(
983        kind: PeKind,
984        dos: DosHeader,
985        dos_stub: Vec<u8>,
986        coff: CoffHeader,
987        optional: Option<OptionalHeader>,
988        image_base: u64,
989        address_of_entry_point: u32,
990        data_directories: Vec<DataDirectory>,
991        sections: Vec<SectionHeader>,
992        extra_bytes: Vec<(u64, Vec<u8>)>,
993        file_size: u64,
994    ) -> Self {
995        let mut raw = vec![0u8; file_size as usize];
996        // Lay the DOS stub down first so write_to_vec's pass
997        // can overlay the structured DOS header on top.
998        if !dos_stub.is_empty() && raw.len() >= DOS_HEADER_SIZE + dos_stub.len() {
999            raw[DOS_HEADER_SIZE..DOS_HEADER_SIZE + dos_stub.len()].copy_from_slice(&dos_stub);
1000        }
1001        for (off, bytes) in extra_bytes {
1002            let off = off as usize;
1003            let end = off + bytes.len();
1004            if end <= raw.len() {
1005                raw[off..end].copy_from_slice(&bytes);
1006            }
1007        }
1008        let e_lfanew = dos.e_lfanew;
1009        let mut file = Self {
1010            kind,
1011            e_lfanew,
1012            dos,
1013            dos_stub,
1014            coff,
1015            optional,
1016            image_base,
1017            address_of_entry_point,
1018            data_directories,
1019            sections,
1020            raw,
1021        };
1022        // Overwrite the buffer with the canonical structured
1023        // encoding so any drift between the supplied raw bytes
1024        // and the structured fields lands the structured value.
1025        file.raw = file.write_to_vec();
1026        file
1027    }
1028
1029    /// Serialize back to bytes. Always byte-identical to the
1030    /// parsed input — the structured DOS/optional/section headers
1031    /// are encoded back into the buffer over a base copy of the
1032    /// original raw bytes, so any field not covered by a
1033    /// structured field (DOS stub bytes, alignment padding,
1034    /// section content) rides through verbatim. Useful for
1035    /// callers that edit a structured field (e.g. bump the
1036    /// optional header's CheckSum) and want the rebuilt bytes.
1037    #[must_use]
1038    pub fn write_to_vec(&self) -> Vec<u8> {
1039        let mut out = self.raw.clone();
1040        // DOS header — always 64 bytes at offset 0.
1041        if out.len() >= DOS_HEADER_SIZE {
1042            out[..DOS_HEADER_SIZE].copy_from_slice(&self.dos.encode());
1043        }
1044        let pe_off = self.e_lfanew as usize;
1045        if pe_off + 4 <= out.len() {
1046            out[pe_off..pe_off + 4].copy_from_slice(&PE_SIGNATURE);
1047        }
1048        let coff_off = pe_off + 4;
1049        if coff_off + COFF_HEADER_SIZE <= out.len() {
1050            out[coff_off..coff_off + COFF_HEADER_SIZE].copy_from_slice(&self.coff.encode());
1051        }
1052        let opt_off = coff_off + COFF_HEADER_SIZE;
1053        if let Some(opt) = self.optional.as_ref() {
1054            let opt_bytes = opt.encode();
1055            if opt_off + opt_bytes.len() <= out.len() {
1056                out[opt_off..opt_off + opt_bytes.len()].copy_from_slice(&opt_bytes);
1057            }
1058            let dd_off = opt_off + opt_bytes.len();
1059            for (i, dd) in self.data_directories.iter().enumerate() {
1060                let off = dd_off + i * 8;
1061                if off + 8 > out.len() {
1062                    break;
1063                }
1064                out[off..off + 4].copy_from_slice(&dd.virtual_address.to_le_bytes());
1065                out[off + 4..off + 8].copy_from_slice(&dd.size.to_le_bytes());
1066            }
1067        }
1068        let sec_off = opt_off + self.coff.size_of_optional_header as usize;
1069        for (i, sh) in self.sections.iter().enumerate() {
1070            let off = sec_off + i * SECTION_HEADER_SIZE;
1071            if off + SECTION_HEADER_SIZE > out.len() {
1072                break;
1073            }
1074            out[off..off + SECTION_HEADER_SIZE].copy_from_slice(&sh.encode());
1075        }
1076        out
1077    }
1078}
1079
1080/// One entry from a PE file's Import Address Table (IAT). Names
1081/// the imported symbol the loader will patch into `iat_va` at
1082/// run time. Either `name` or `ordinal` is set (an import is
1083/// either by-name or by-ordinal); rarely both, never neither.
1084#[derive(Debug, Clone)]
1085pub struct PeImport {
1086    /// Run-time virtual address of the IAT slot — what the
1087    /// loader writes the resolved function pointer into, and
1088    /// what `call dword ptr [iat_va]` references in code.
1089    pub iat_va: u64,
1090    /// Name of the DLL that provides the symbol (e.g. `"KERNEL32.dll"`).
1091    /// Empty when the import descriptor's Name RVA didn't
1092    /// resolve to a readable string.
1093    pub dll_name: String,
1094    /// Symbol name when the import is by-name.
1095    pub name: Option<String>,
1096    /// Ordinal when the import is by-ordinal. Common for some
1097    /// system DLLs (e.g. WS2_32 uses ordinals for many entries).
1098    pub ordinal: Option<u16>,
1099}
1100
1101/// One entry from a PE file's Export Address Table.
1102#[derive(Debug, Clone)]
1103pub struct PeExport {
1104    /// The export's ordinal (Base + index). Always present in the
1105    /// EAT.
1106    pub ordinal: u32,
1107    /// RVA of the export's code. Forwarder entries (which point at
1108    /// a redirect string instead) are excluded by [`PeFile::exports`].
1109    pub rva: u32,
1110    /// Symbolic name when the export has one; `None` for ordinal-
1111    /// only exports.
1112    pub name: Option<String>,
1113}
1114
1115/// Returns true if `bytes` look like a PE file (start with the DOS
1116/// `MZ` magic and have a parseable `e_lfanew`).
1117#[must_use]
1118pub fn is_pe(bytes: &[u8]) -> bool {
1119    bytes.len() >= DOS_HEADER_SIZE && bytes[..2] == DOS_MAGIC
1120}
1121
1122impl CoffHeader {
1123    /// Encode the 20-byte COFF header.
1124    #[must_use]
1125    pub fn encode(&self) -> [u8; 20] {
1126        let mut out = [0u8; 20];
1127        out[0..2].copy_from_slice(&self.machine.to_le_bytes());
1128        out[2..4].copy_from_slice(&self.number_of_sections.to_le_bytes());
1129        out[4..8].copy_from_slice(&self.time_date_stamp.to_le_bytes());
1130        out[8..12].copy_from_slice(&self.pointer_to_symbol_table.to_le_bytes());
1131        out[12..16].copy_from_slice(&self.number_of_symbols.to_le_bytes());
1132        out[16..18].copy_from_slice(&self.size_of_optional_header.to_le_bytes());
1133        out[18..20].copy_from_slice(&self.characteristics.to_le_bytes());
1134        out
1135    }
1136}
1137
1138impl SectionHeader {
1139    /// Encode the 40-byte section header.
1140    #[must_use]
1141    pub fn encode(&self) -> [u8; SECTION_HEADER_SIZE] {
1142        let mut out = [0u8; SECTION_HEADER_SIZE];
1143        out[0..8].copy_from_slice(&self.name);
1144        out[8..12].copy_from_slice(&self.virtual_size.to_le_bytes());
1145        out[12..16].copy_from_slice(&self.virtual_address.to_le_bytes());
1146        out[16..20].copy_from_slice(&self.size_of_raw_data.to_le_bytes());
1147        out[20..24].copy_from_slice(&self.pointer_to_raw_data.to_le_bytes());
1148        out[24..28].copy_from_slice(&self.pointer_to_relocations.to_le_bytes());
1149        out[28..32].copy_from_slice(&self.pointer_to_linenumbers.to_le_bytes());
1150        out[32..34].copy_from_slice(&self.number_of_relocations.to_le_bytes());
1151        out[34..36].copy_from_slice(&self.number_of_linenumbers.to_le_bytes());
1152        out[36..40].copy_from_slice(&self.characteristics.to_le_bytes());
1153        out
1154    }
1155}
1156
1157fn parse_coff_header(bytes: &[u8]) -> CoffHeader {
1158    debug_assert!(bytes.len() >= COFF_HEADER_SIZE);
1159    CoffHeader {
1160        machine: read_u16(bytes, 0),
1161        number_of_sections: read_u16(bytes, 2),
1162        time_date_stamp: read_u32(bytes, 4),
1163        pointer_to_symbol_table: read_u32(bytes, 8),
1164        number_of_symbols: read_u32(bytes, 12),
1165        size_of_optional_header: read_u16(bytes, 16),
1166        characteristics: read_u16(bytes, 18),
1167    }
1168}
1169
1170fn parse_section_header(bytes: &[u8]) -> SectionHeader {
1171    debug_assert!(bytes.len() >= SECTION_HEADER_SIZE);
1172    let mut name = [0u8; 8];
1173    name.copy_from_slice(&bytes[0..8]);
1174    SectionHeader {
1175        name,
1176        virtual_size: read_u32(bytes, 8),
1177        virtual_address: read_u32(bytes, 12),
1178        size_of_raw_data: read_u32(bytes, 16),
1179        pointer_to_raw_data: read_u32(bytes, 20),
1180        pointer_to_relocations: read_u32(bytes, 24),
1181        pointer_to_linenumbers: read_u32(bytes, 28),
1182        number_of_relocations: read_u16(bytes, 32),
1183        number_of_linenumbers: read_u16(bytes, 34),
1184        characteristics: read_u32(bytes, 36),
1185    }
1186}
1187
1188fn ensure_len(bytes: &[u8], offset: u64, needed: u64) -> Result<()> {
1189    let end = offset
1190        .checked_add(needed)
1191        .ok_or_else(|| Error::RegionOverflow {
1192            label: "ensure_len".into(),
1193            offset,
1194            size: needed,
1195        })?;
1196    if end > bytes.len() as u64 {
1197        return Err(Error::Truncated {
1198            offset,
1199            needed,
1200            have: bytes.len() as u64,
1201        });
1202    }
1203    Ok(())
1204}
1205
1206fn read_u16(bytes: &[u8], off: usize) -> u16 {
1207    u16::from_le_bytes(bytes[off..off + 2].try_into().unwrap())
1208}
1209
1210fn read_u32(bytes: &[u8], off: usize) -> u32 {
1211    u32::from_le_bytes(bytes[off..off + 4].try_into().unwrap())
1212}
1213
1214fn read_u64(bytes: &[u8], off: usize) -> u64 {
1215    u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap())
1216}
1217
1218/// Decode the 8-byte name field of a COFF symbol entry.
1219///
1220/// Two encodings:
1221///
1222/// * Short name (≤ 8 bytes): the bytes are the name, possibly NUL-
1223///   padded if shorter than 8. We trim to first NUL.
1224/// * Long name (> 8 bytes): the first 4 bytes are zero; the next 4
1225///   bytes are a u32 offset into the string table. We follow the
1226///   indirection and read up to the next NUL.
1227///
1228/// Returns an empty string if the bytes aren't valid UTF-8 or the
1229/// long-name offset overflows the string table.
1230fn decode_coff_symbol_name(name: &[u8], strtab: &[u8]) -> String {
1231    debug_assert!(name.len() == 8);
1232    if name[0..4] == [0u8; 4] {
1233        let off = u32::from_le_bytes(name[4..8].try_into().unwrap()) as usize;
1234        let Some(tail) = strtab.get(off..) else {
1235            return String::new();
1236        };
1237        let nul = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
1238        return std::str::from_utf8(&tail[..nul])
1239            .ok()
1240            .map(str::to_string)
1241            .unwrap_or_default();
1242    }
1243    let nul = name.iter().position(|&b| b == 0).unwrap_or(name.len());
1244    std::str::from_utf8(&name[..nul])
1245        .ok()
1246        .map(str::to_string)
1247        .unwrap_or_default()
1248}
1249
1250#[cfg(test)]
1251mod tests {
1252    use super::*;
1253
1254    fn minimal_pe_bytes() -> Vec<u8> {
1255        // Smallest synthetic PE: DOS header → PE signature → COFF
1256        // header (no sections, no optional header). Used to exercise
1257        // the structural-validation code paths in isolation.
1258        let mut v = vec![0u8; 0x80];
1259        // DOS magic
1260        v[0..2].copy_from_slice(&DOS_MAGIC);
1261        // e_lfanew → 0x40
1262        v[E_LFANEW_OFFSET..E_LFANEW_OFFSET + 4].copy_from_slice(&0x40_u32.to_le_bytes());
1263        // PE signature at 0x40
1264        v[0x40..0x44].copy_from_slice(&PE_SIGNATURE);
1265        // COFF header at 0x44 — Machine = i386, all other fields 0.
1266        v[0x44..0x46].copy_from_slice(&IMAGE_FILE_MACHINE_I386.to_le_bytes());
1267        // Tail-pad so total length covers the 20-byte COFF header.
1268        v
1269    }
1270
1271    #[test]
1272    fn parses_minimal_pe() {
1273        let v = minimal_pe_bytes();
1274        let pe = PeFile::parse(&v).unwrap();
1275        assert_eq!(pe.coff.machine, IMAGE_FILE_MACHINE_I386);
1276        assert_eq!(pe.coff.number_of_sections, 0);
1277        assert!(pe.sections.is_empty());
1278    }
1279
1280    #[test]
1281    fn round_trips_minimal_pe() {
1282        let v = minimal_pe_bytes();
1283        let pe = PeFile::parse(&v).unwrap();
1284        assert_eq!(pe.write_to_vec(), v);
1285    }
1286
1287    #[test]
1288    fn rejects_bad_dos_magic() {
1289        let mut v = minimal_pe_bytes();
1290        v[0] = b'X';
1291        let err = PeFile::parse(&v).unwrap_err();
1292        assert!(matches!(err, Error::BadDosMagic(_)));
1293    }
1294
1295    #[test]
1296    fn rejects_bad_pe_signature() {
1297        let mut v = minimal_pe_bytes();
1298        v[0x40] = b'X';
1299        let err = PeFile::parse(&v).unwrap_err();
1300        assert!(matches!(err, Error::BadPeSignature(_)));
1301    }
1302
1303    #[test]
1304    fn rejects_lfanew_past_end() {
1305        let mut v = minimal_pe_bytes();
1306        v[E_LFANEW_OFFSET..E_LFANEW_OFFSET + 4].copy_from_slice(&0xffff_ffff_u32.to_le_bytes());
1307        let err = PeFile::parse(&v).unwrap_err();
1308        assert!(matches!(err, Error::LfanewOutOfRange { .. }));
1309    }
1310
1311    #[test]
1312    fn is_pe_recognises_dos_header() {
1313        let v = minimal_pe_bytes();
1314        assert!(is_pe(&v));
1315    }
1316
1317    #[test]
1318    fn is_pe_rejects_short_input() {
1319        assert!(!is_pe(&[0u8; 10]));
1320    }
1321
1322    #[test]
1323    fn dos_header_encode_round_trip() {
1324        let bytes = minimal_pe_bytes();
1325        let dos = DosHeader::parse(&bytes[..64]);
1326        let re = dos.encode();
1327        assert_eq!(&re[..], &bytes[..64]);
1328    }
1329
1330    #[test]
1331    fn optional_header_encode_round_trip_against_fixture() {
1332        // The synthetic `minimal_pe_bytes` has no optional
1333        // header, so test against a real PE fixture if one
1334        // is available — skip when running against a stripped
1335        // tree.
1336        let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1337            .ancestors()
1338            .find(|p| p.join("testdata").is_dir())
1339            .map(|p| p.join("testdata/sqrt-mingw15-O0.exe"));
1340        let Some(path) = path else {
1341            eprintln!("note: testdata/ unavailable; skipping");
1342            return;
1343        };
1344        let Ok(bytes) = std::fs::read(&path) else {
1345            eprintln!("note: {} unavailable; skipping", path.display());
1346            return;
1347        };
1348        let pe = PeFile::parse(&bytes).expect("parse fixture");
1349        let opt = pe
1350            .optional
1351            .as_ref()
1352            .expect("fixture should have an optional header");
1353        let opt_off = pe.e_lfanew as usize + 4 + COFF_HEADER_SIZE;
1354        let opt_tail = match pe.kind {
1355            PeKind::Pe32 => 96,
1356            PeKind::Pe32Plus => 112,
1357        };
1358        let re = opt.encode();
1359        assert_eq!(re.len(), opt_tail);
1360        assert_eq!(&re[..], &bytes[opt_off..opt_off + opt_tail]);
1361    }
1362}