malwaredb_types/exec/pef/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use crate::exec::{Architecture, ExecutableFile, OperatingSystem, Section, Sections};
4use crate::utils::{
5    bytes_offset_match, i32_from_offset, string_from_offset, u16_from_offset, u32_from_offset,
6    EntropyCalc,
7};
8use crate::{Ordering, SpecimenFile};
9
10use std::fmt::{Display, Formatter};
11use std::mem::size_of;
12
13use anyhow::{bail, Result};
14use chrono::{DateTime, NaiveDateTime, TimeDelta, Utc};
15use tracing::instrument;
16
17// Documentation:
18// https://web.archive.org/web/20020219190852/http://developer.apple.com/techpubs/mac/runtimehtml/RTArch-91.html#HEADING=91-0
19
20const MAGIC: [u8; 8] = [0x4a, 0x6f, 0x79, 0x21, 0x70, 0x65, 0x66, 0x66]; // Joy!peff
21const PWPC: [u8; 4] = [0x70, 0x77, 0x70, 0x63];
22const M68K: [u8; 4] = [0x6d, 0x36, 0x38, 0x6b];
23
24const HEADER_SIZE: usize = 40;
25const SECTION_HEADER_SIZE: usize = 28;
26
27/// The struct for [Preferred Executables](https://en.wikipedia.org/wiki/Preferred_Executable_Format).
28///
29/// This was the binary format for "Classic" Mac OS, and Be OS on Power PC. Some data is only
30/// on the "resource fork", which is not available on modern systems, so we can't the entire file. :(
31#[derive(Clone, Debug, PartialEq)]
32pub struct Pef<'a> {
33    /// Instruction set architecture for this binary
34    pub arch: Option<Architecture>,
35
36    /// Byte ordering for this binary
37    pub ordering: Ordering,
38
39    /// Operating System for this binary, likely Classic Mac OS
40    pub os: OperatingSystem,
41
42    /// Sections of this binary
43    pub sections: Option<Sections<'a>>,
44
45    /// Seconds since 1 January 1904
46    pub timestamp: u32,
47
48    /// The array containing the raw bytes used to parse this program
49    pub contents: &'a [u8],
50}
51
52/// PEF section header
53#[derive(Copy, Clone, Debug, Eq, PartialEq)]
54pub struct SectionHeader {
55    /// Location in the file for the section name, or -1 if the section is unnamed
56    pub name_offset: Option<usize>,
57
58    /// Linker's preferred memory address for loading the binary
59    pub default_address: u32,
60
61    /// Total section size in memory at run-time
62    pub total_size: u32,
63
64    /// Size of the executable code, or data to be initialized at run-time after decompression
65    pub unpacked_size: u32,
66
67    /// Size of the section
68    pub packed_size: u32,
69
70    /// Location in the file where the section begins
71    pub container_offset: u32,
72
73    /// Attributes of the section
74    pub section_kind: u8,
75
76    /// Indicates how data might be shared at run-time
77    pub share_kind: u8,
78
79    /// Alignment of bytes in memory
80    pub alignment: u8,
81
82    /// Reserved, should be zero
83    pub reserved: u8,
84}
85
86impl AsRef<[u8; size_of::<Self>()]> for SectionHeader {
87    #[allow(clippy::transmute_ptr_to_ptr)]
88    fn as_ref(&self) -> &[u8; size_of::<Self>()] {
89        unsafe { std::mem::transmute::<_, &[u8; size_of::<Self>()]>(self) }
90    }
91}
92
93impl SectionHeader {
94    /// Section header from a sequence of bytes
95    #[must_use]
96    pub fn from(contents: &[u8]) -> Self {
97        Self {
98            name_offset: {
99                let val = i32_from_offset(contents, 0, Ordering::BigEndian).unwrap_or_default();
100                if val > 0 {
101                    #[allow(clippy::cast_sign_loss)]
102                    Some(val as usize)
103                } else {
104                    None
105                }
106            },
107            default_address: u32_from_offset(contents, 4, Ordering::BigEndian).unwrap_or_default(),
108            total_size: u32_from_offset(contents, 8, Ordering::BigEndian).unwrap_or_default(),
109            unpacked_size: u32_from_offset(contents, 12, Ordering::BigEndian).unwrap_or_default(),
110            packed_size: u32_from_offset(contents, 16, Ordering::BigEndian).unwrap_or_default(),
111            container_offset: u32_from_offset(contents, 20, Ordering::BigEndian)
112                .unwrap_or_default(),
113            section_kind: contents[24],
114            share_kind: contents[25],
115            alignment: contents[26],
116            reserved: contents[27],
117        }
118    }
119}
120
121impl<'a> Pef<'a> {
122    /// Parsed PEF from a sequence of bytes
123    ///
124    /// # Errors
125    ///
126    /// Returns an error if parsing fails.
127    #[instrument(name = "PEF parser", skip(contents))]
128    pub fn from(contents: &'a [u8]) -> Result<Self> {
129        if !bytes_offset_match(contents, 0, &MAGIC) {
130            bail!("Not a PEF file");
131        }
132
133        let arch = {
134            if bytes_offset_match(contents, 8, &PWPC) {
135                Some(Architecture::PowerPC)
136            } else if bytes_offset_match(contents, 8, &M68K) {
137                Some(Architecture::M68k)
138            } else {
139                None
140            }
141        };
142
143        let section_count = u16_from_offset(contents, 32, Ordering::BigEndian).unwrap_or_default();
144        let inst_section_count =
145            u16_from_offset(contents, 34, Ordering::BigEndian).unwrap_or_default();
146
147        let mut sections = Sections::default();
148        for section_index in 0..(section_count + inst_section_count) as usize {
149            // There seems to be an issue after "section_count" number of sections where
150            // the sizes or needed offset value changes, and the incoming values don't
151            // match what one would expect when looking at the binary with a hex editor.
152            let offset_this_section = HEADER_SIZE + section_index * SECTION_HEADER_SIZE;
153            if offset_this_section > contents.len() {
154                break;
155            }
156            let this_section = SectionHeader::from(
157                &contents[offset_this_section..offset_this_section + HEADER_SIZE],
158            );
159
160            let section_name = {
161                let default = format!("Unnamed section {section_index}");
162                if let Some(offset) = this_section.name_offset {
163                    string_from_offset(contents, offset).unwrap_or(default)
164                } else {
165                    default
166                }
167            };
168
169            sections.push(Section {
170                name: section_name,
171                is_executable: this_section.section_kind == 0 || this_section.section_kind == 8,
172                size: this_section.packed_size as usize,
173                offset: this_section.container_offset as usize,
174                virtual_size: 0,
175                virtual_address: 0,
176                data: None,
177                entropy: 0.0,
178            });
179        }
180
181        Ok(Self {
182            arch,
183            ordering: Ordering::BigEndian,
184            os: OperatingSystem::MacOS_Classic,
185            sections: Some(sections),
186            timestamp: u32_from_offset(contents, 16, Ordering::BigEndian).unwrap_or_default(),
187            contents,
188        })
189    }
190
191    /// Compiled timestamp as UTC
192    ///
193    /// # Panics
194    ///
195    /// This code won't panic despite some `.unwrap()` calls.
196    #[must_use]
197    pub fn compiled_date(&self) -> DateTime<Utc> {
198        let janone1940 = DateTime::from_naive_utc_and_offset(
199            NaiveDateTime::parse_from_str("1904-01-01 00:00:00", "%Y-%m-%d %H:%M:%S").unwrap(),
200            Utc,
201        );
202        janone1940 + TimeDelta::try_seconds(i64::from(self.timestamp)).unwrap()
203    }
204}
205
206impl ExecutableFile for Pef<'_> {
207    fn architecture(&self) -> Option<Architecture> {
208        self.arch
209    }
210
211    fn pointer_size(&self) -> usize {
212        32
213    }
214
215    fn operating_system(&self) -> OperatingSystem {
216        self.os
217    }
218
219    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
220        Some(self.compiled_date())
221    }
222
223    #[allow(clippy::cast_possible_truncation)]
224    fn num_sections(&self) -> u32 {
225        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
226    }
227
228    fn sections(&self) -> Option<&Sections<'_>> {
229        self.sections.as_ref()
230    }
231
232    fn import_hash(&self) -> Option<String> {
233        None
234    }
235
236    fn fuzzy_imports(&self) -> Option<String> {
237        None
238    }
239}
240
241impl SpecimenFile for Pef<'_> {
242    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
243
244    fn type_name(&self) -> &'static str {
245        "PEF"
246    }
247}
248
249impl Display for Pef<'_> {
250    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
251        writeln!(f, "PEF file:")?;
252        writeln!(f, "\tOS: {}", self.os)?;
253        if let Some(arch) = self.arch {
254            writeln!(f, "\tArchitecture: {arch}")?;
255        }
256        writeln!(f, "\tOrdering: {}", self.ordering)?;
257        if let Some(sections) = &self.sections {
258            writeln!(f, "\t{} sections:", sections.len())?;
259            for section in sections {
260                writeln!(f, "\t\t{section}")?;
261            }
262        }
263        writeln!(
264            f,
265            "\tCompiled: {:?}",
266            self.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
267        )?;
268        writeln!(f, "\tSize: {}", self.contents.len())?;
269        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use rstest::rstest;
277
278    #[test]
279    fn beos() {
280        const BYTES: &[u8] = include_bytes!("../../../testdata/pef/BeApp");
281
282        let pef = Pef::from(BYTES).unwrap();
283        eprintln!("BeOS:\n{pef}");
284        assert_eq!(pef.arch, Some(Architecture::PowerPC));
285    }
286
287    #[rstest]
288    #[case(include_bytes!("../../../testdata/pef/MacOS_1"))]
289    #[case(include_bytes!("../../../testdata/pef/MacOS_2"))]
290    fn macos(#[case] bytes: &[u8]) {
291        let pef = Pef::from(bytes).unwrap();
292        eprintln!("Mac OS:\n{pef}");
293        assert_eq!(pef.arch, Some(Architecture::PowerPC));
294    }
295}