1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#![allow(clippy::nonstandard_macro_braces)] // clippy bug, see https://github.com/rust-lang/rust-clippy/issues/7434

use anyhow::Result;
use log::debug;
use thiserror::Error;

pub mod imports;
pub mod rsrc;

use crate::{
    arch::Arch,
    aspace::RelativeAddressSpace,
    module::{Module, Permissions, Section},
    util, RVA, VA,
};

#[derive(Error, Debug)]
pub enum PEError {
    #[error("format not supported: {0}")]
    FormatNotSupported(String),

    #[error("malformed PE file: {0}")]
    MalformedPEFile(String),
}

// ref: https://docs.microsoft.com/en-us/windows/win32/api/dbghelp/nf-dbghelp-imagedirectoryentrytodata#parameters
pub const IMAGE_DIRECTORY_ENTRY_EXPORT: usize = 0;
pub const IMAGE_DIRECTORY_ENTRY_IMPORT: usize = 1;
pub const IMAGE_DIRECTORY_ENTRY_RESOURCE: usize = 2;
pub const IMAGE_DIRECTORY_ENTRY_EXCEPTION: usize = 3;
pub const IMAGE_DIRECTORY_ENTRY_SECURITY: usize = 4;
pub const IMAGE_DIRECTORY_ENTRY_BASERELOC: usize = 5;
pub const IMAGE_DIRECTORY_ENTRY_DEBUG: usize = 6;
pub const IMAGE_DIRECTORY_ENTRY_ARCHITECTURE: usize = 7;
pub const IMAGE_DIRECTORY_ENTRY_GLOBALPTR: usize = 8;
pub const IMAGE_DIRECTORY_ENTRY_TLS: usize = 9;
pub const IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG: usize = 10;
pub const IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT: usize = 11;
pub const IMAGE_DIRECTORY_ENTRY_IAT: usize = 12;
pub const IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT: usize = 13;
pub const IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR: usize = 14;
pub const IMAGE_DIRECTORY_MAX: usize = IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR;

pub struct DataDirectory {
    pub address: VA,
    pub size:    RVA,
}

/// A parsed and loaded PE file.
/// The `buf` field contains the raw data.
/// The `module` field contains an address space as the PE would be loaded.
pub struct PE {
    pub buf:    Vec<u8>,
    pub module: Module,
    pub header: goblin::pe::header::Header,
}

impl PE {
    pub fn from_bytes(buf: &[u8]) -> Result<PE> {
        load_pe(buf)
    }

    pub fn executable_sections<'b>(&'b self) -> Box<dyn Iterator<Item = &Section> + 'b> {
        Box::new(
            self.module
                .sections
                .iter()
                .filter(|section| section.permissions.intersects(Permissions::X)),
        )
    }

    pub fn pe(&self) -> Result<goblin::pe::PE> {
        get_pe(&self.buf)
    }

    pub fn get_data_directory(&self, data_directory: usize) -> Result<Option<DataDirectory>> {
        assert!(data_directory <= IMAGE_DIRECTORY_MAX);

        let opt_header = match self.header.optional_header {
            Some(opt_header) => opt_header,
            _ => return Ok(None),
        };

        match opt_header.data_directories.data_directories[data_directory] {
            Some(directory) => Ok(Some(DataDirectory {
                address: self.module.address_space.base_address + directory.virtual_address as VA,
                size:    directory.size as RVA,
            })),
            _ => Ok(None),
        }
    }
}

fn get_pe(buf: &[u8]) -> Result<goblin::pe::PE> {
    let pe = goblin::pe::PE::parse(buf)?;
    if let Some(opt) = pe.header.optional_header {
        if opt.data_directories.get_clr_runtime_header().is_some() {
            return Err(PEError::FormatNotSupported(".NET assembly".to_string()).into());
        }
    }
    Ok(pe)
}

#[allow(clippy::unnecessary_wraps)]
fn load_pe_header(buf: &[u8], pe: &goblin::pe::PE, base_address: VA) -> Result<Section> {
    let hdr_raw_size = match pe.header.optional_header {
        Some(opt) => opt.windows_fields.size_of_headers,
        // assumption: header is at most 0x200 bytes.
        _ => 0x200,
    };

    //   on disk:
    //
    //   +---------------------------------+
    //   |   header        |  sections...  |
    //   +---------------------------------+
    //   .                  \
    //   .  in memory:       \
    //   .                    \
    //   +-----------------+---+        +-------------
    //   |   header        |   |        |  sections...
    //   +-----------------+---+        +-------------
    //                     ^   ^
    //                     |   +--- virtual size
    //                     |        aligned to 0x200
    //                     +-- raw size
    //                         no alignment

    let hdr_raw_size = std::cmp::min(hdr_raw_size as usize, buf.len());
    let hdr_virt_size = util::align(hdr_raw_size as u64, 0x200);

    Ok(Section {
        physical_range: std::ops::Range {
            start: 0x0,
            end:   hdr_raw_size as u64,
        },
        virtual_range:  std::ops::Range {
            start: base_address,
            end:   base_address + hdr_virt_size,
        },
        permissions:    Permissions::R,
        name:           "header".to_string(),
    })
}

/// The section can be executed as code.
const IMAGE_SCN_MEM_EXECUTE: u32 = 0x2000_0000;

/// The section can be read.
const IMAGE_SCN_MEM_READ: u32 = 0x4000_0000;

/// The section can be written to.
const IMAGE_SCN_MEM_WRITE: u32 = 0x8000_0000;

const PAGE_SIZE: u64 = 0x1000;

#[allow(clippy::unnecessary_wraps)]
fn load_pe_section(
    base_address: VA,
    section_alignment: u64,
    section: &goblin::pe::section_table::SectionTable,
) -> Result<Section> {
    let section_name = String::from_utf8_lossy(&section.name[..]).into_owned();

    let trimmed_name = section_name.trim_end_matches('\u{0}').trim_end();

    let name = trimmed_name
        .split_once('\u{0}')
        .map(|(name, _)| name)
        .unwrap_or_else(|| trimmed_name)
        .to_string();

    let virtual_size = util::align(section.virtual_size as u64, section_alignment);

    let mut perms = Permissions::empty();
    if section.characteristics & IMAGE_SCN_MEM_READ > 0 {
        perms.insert(Permissions::R);
    }
    if section.characteristics & IMAGE_SCN_MEM_WRITE > 0 {
        perms.insert(Permissions::W);
    }
    if section.characteristics & IMAGE_SCN_MEM_EXECUTE > 0 {
        perms.insert(Permissions::X);
    }

    debug!(
        "pe: section: {} at {:#x}",
        name,
        base_address + section.virtual_address as u64
    );

    Ok(Section {
        physical_range: std::ops::Range {
            start: section.pointer_to_raw_data as u64,
            end:   (section.pointer_to_raw_data + section.size_of_raw_data) as u64,
        },
        virtual_range: std::ops::Range {
            start: base_address + section.virtual_address as u64,
            end:   base_address + section.virtual_address as u64 + virtual_size,
        },
        permissions: perms,
        name,
    })
}

// lots of further detail here: https://github.com/corkami/docs/blob/master/PE/PE.md
fn load_pe(buf: &[u8]) -> Result<PE> {
    let pe = get_pe(buf)?;

    let arch = match pe.is_64 {
        false => Arch::X32,
        true => Arch::X64,
    };
    debug!("pe: arch: {:?}", arch);

    let (base_address, section_alignment) = match pe.header.optional_header {
        Some(opt) => (
            opt.windows_fields.image_base,
            opt.windows_fields.section_alignment as u64,
        ),
        _ => {
            debug!("pe: base address: using default: 0x40:000");
            (0x40_0000, PAGE_SIZE)
        }
    };
    debug!("pe: base address: {:#x}", base_address);

    let mut sections = vec![load_pe_header(buf, &pe, base_address)?];
    for section in pe.sections.iter() {
        sections.push(load_pe_section(base_address, section_alignment, section)?);
    }

    let max_address = sections.iter().map(|sec| sec.virtual_range.end).max().unwrap();
    let max_page_address = util::align(max_address, PAGE_SIZE) - base_address;
    debug!("pe: address space: capacity: {:#x}", max_page_address);

    let mut address_space = RelativeAddressSpace::with_capacity(max_page_address);

    for section in sections.iter() {
        let pstart = section.physical_range.start as usize;
        let pend = section.physical_range.end as usize;
        let psize = pend - pstart;
        let pbuf = &buf[pstart..pend];

        // the section range contains VAs,
        // while we're writing to the RelativeAddressSpace.
        // so shift down by `base_address`.
        let vstart = section.virtual_range.start;
        let rstart = vstart - base_address;
        let vsize = util::align(
            section.virtual_range.end - section.virtual_range.start,
            section_alignment,
        );
        let vend = vstart + vsize;
        let mut vbuf = vec![0u8; vsize as usize];

        if vsize as usize >= psize {
            // vsize > psize, so there will be NULL bytes padding the physical data.
            let dest = &mut vbuf[0..psize];
            dest.copy_from_slice(pbuf);
        } else {
            // psize > vsize, but vsize wins, so we only read a subset of physical data.
            let src = &pbuf[0..vsize as usize];
            vbuf.copy_from_slice(src);
        }

        address_space.map.writezx(rstart, &vbuf)?;

        debug!(
            "pe: address space: mapped {:#x} - {:#x} {:?}",
            vstart, vend, section.permissions
        );
    }

    let module = Module {
        arch,
        sections,
        address_space: address_space.into_absolute(base_address)?,
    };

    debug!("pe: loaded");
    Ok(PE {
        buf: buf.to_vec(),
        module,
        header: pe.header,
    })
}

#[cfg(test)]
mod tests {
    use anyhow::Result;

    use crate::{aspace::AddressSpace, rsrc::*};

    #[test]
    fn base_address() -> Result<()> {
        let buf = get_buf(Rsrc::K32);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        assert_eq!(0x1_8000_0000, pe.module.address_space.base_address);

        Ok(())
    }

    #[test]
    fn mz_header() -> Result<()> {
        let buf = get_buf(Rsrc::K32);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        // relative read
        assert_eq!(0x4d, pe.module.address_space.relative.read_u8(0x0)?);
        assert_eq!(0x5a, pe.module.address_space.relative.read_u8(0x1)?);

        // absolute read
        assert_eq!(0x4d, pe.module.address_space.read_u8(0x1_8000_0000 + 0x0)?);
        assert_eq!(0x5a, pe.module.address_space.read_u8(0x1_8000_0000 + 0x1)?);

        Ok(())
    }

    #[test]
    fn k32() -> Result<()> {
        let buf = get_buf(Rsrc::K32);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        assert_eq!(0x4d, pe.module.address_space.relative.read_u8(0x0)?);
        assert_eq!(0x5a, pe.module.address_space.relative.read_u8(0x1)?);

        Ok(())
    }

    #[test]
    fn tiny() -> Result<()> {
        let buf = get_buf(Rsrc::TINY);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        assert_eq!(0x4d, pe.module.address_space.relative.read_u8(0x0)?);
        assert_eq!(0x5a, pe.module.address_space.relative.read_u8(0x1)?);

        Ok(())
    }

    #[test]
    fn nop() -> Result<()> {
        let buf = get_buf(Rsrc::NOP);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        assert_eq!(0x4d, pe.module.address_space.relative.read_u8(0x0)?);
        assert_eq!(0x5a, pe.module.address_space.relative.read_u8(0x1)?);

        Ok(())
    }

    #[test]
    fn mimi() -> Result<()> {
        let buf = get_buf(Rsrc::MIMI);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        assert_eq!(0x4d, pe.module.address_space.relative.read_u8(0x0)?);
        assert_eq!(0x5a, pe.module.address_space.relative.read_u8(0x1)?);

        Ok(())
    }

    // this demonstrates that the PE will be loaded and sections padded out to their
    // virtual range.
    #[test]
    fn read_each_section() -> Result<()> {
        let buf = get_buf(Rsrc::K32);
        let pe = crate::loader::pe::PE::from_bytes(&buf)?;

        for section in pe.module.sections.iter() {
            let start = section.virtual_range.start;
            let size = section.virtual_range.end - section.virtual_range.start;
            pe.module
                .address_space
                .read_bytes(start, size as usize)
                .expect(&format!("read section {} {:#x} {:#x}", section.name, start, size));
        }

        Ok(())
    }
}