Skip to main content

malwaredb_types/exec/pef/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use crate::exec::{Architecture, ExecutableFile, OperatingSystem, Section, Sections};
4use crate::utils::{
5    bytes_offset_match, i32_from_offset, string_from_offset, u16_from_offset, u32_from_offset,
6    EntropyCalc,
7};
8use crate::{Ordering, SpecimenFile};
9
10use std::fmt::{Display, Formatter};
11use std::mem::size_of;
12
13use anyhow::{bail, Result};
14use chrono::{DateTime, NaiveDateTime, TimeDelta, Utc};
15use tracing::instrument;
16use uuid::Uuid;
17
18// Documentation:
19// https://web.archive.org/web/20020219190852/http://developer.apple.com/techpubs/mac/runtimehtml/RTArch-91.html#HEADING=91-0
20
21const MAGIC: [u8; 8] = [0x4a, 0x6f, 0x79, 0x21, 0x70, 0x65, 0x66, 0x66]; // Joy!peff
22const PWPC: [u8; 4] = [0x70, 0x77, 0x70, 0x63];
23const M68K: [u8; 4] = [0x6d, 0x36, 0x38, 0x6b];
24
25const HEADER_SIZE: usize = 40;
26const SECTION_HEADER_SIZE: usize = 28;
27
28/// The struct for [Preferred Executables](https://en.wikipedia.org/wiki/Preferred_Executable_Format).
29///
30/// This was the binary format for "Classic" Mac OS, and Be OS on Power PC. Some data is only
31/// on the "resource fork", which is not available on modern systems, so we can't the entire file. :(
32#[derive(Clone, Debug, PartialEq)]
33pub struct Pef<'a> {
34    /// Instruction set architecture for this binary
35    pub arch: Option<Architecture>,
36
37    /// Byte ordering for this binary
38    pub ordering: Ordering,
39
40    /// Operating System for this binary, likely Classic Mac OS
41    pub os: OperatingSystem,
42
43    /// Sections of this binary
44    pub sections: Option<Sections<'a>>,
45
46    /// Seconds since 1 January 1904
47    pub timestamp: u32,
48
49    /// The array containing the raw bytes used to parse this program
50    pub contents: &'a [u8],
51}
52
53/// PEF section header
54#[derive(Copy, Clone, Debug, Eq, PartialEq)]
55pub struct SectionHeader {
56    /// Location in the file for the section name, or -1 if the section is unnamed
57    pub name_offset: Option<usize>,
58
59    /// Linker's preferred memory address for loading the binary
60    pub default_address: u32,
61
62    /// Total section size in memory at run-time
63    pub total_size: u32,
64
65    /// Size of the executable code, or data to be initialized at run-time after decompression
66    pub unpacked_size: u32,
67
68    /// Size of the section
69    pub packed_size: u32,
70
71    /// Location in the file where the section begins
72    pub container_offset: u32,
73
74    /// Attributes of the section
75    pub section_kind: u8,
76
77    /// Indicates how data might be shared at run-time
78    pub share_kind: u8,
79
80    /// Alignment of bytes in memory
81    pub alignment: u8,
82
83    /// Reserved, should be zero
84    pub reserved: u8,
85}
86
87impl AsRef<[u8; size_of::<Self>()]> for SectionHeader {
88    #[allow(clippy::transmute_ptr_to_ptr)]
89    fn as_ref(&self) -> &[u8; size_of::<Self>()] {
90        unsafe { std::mem::transmute::<_, &[u8; size_of::<Self>()]>(self) }
91    }
92}
93
94impl SectionHeader {
95    /// Section header from a sequence of bytes
96    #[must_use]
97    pub fn from(contents: &[u8]) -> Self {
98        Self {
99            name_offset: {
100                let val = i32_from_offset(contents, 0, Ordering::BigEndian).unwrap_or_default();
101                if val > 0 {
102                    #[allow(clippy::cast_sign_loss)]
103                    Some(val as usize)
104                } else {
105                    None
106                }
107            },
108            default_address: u32_from_offset(contents, 4, Ordering::BigEndian).unwrap_or_default(),
109            total_size: u32_from_offset(contents, 8, Ordering::BigEndian).unwrap_or_default(),
110            unpacked_size: u32_from_offset(contents, 12, Ordering::BigEndian).unwrap_or_default(),
111            packed_size: u32_from_offset(contents, 16, Ordering::BigEndian).unwrap_or_default(),
112            container_offset: u32_from_offset(contents, 20, Ordering::BigEndian)
113                .unwrap_or_default(),
114            section_kind: contents[24],
115            share_kind: contents[25],
116            alignment: contents[26],
117            reserved: contents[27],
118        }
119    }
120}
121
122impl<'a> Pef<'a> {
123    /// Parsed PEF from a sequence of bytes
124    ///
125    /// # Errors
126    ///
127    /// Returns an error if parsing fails.
128    #[instrument(name = "PEF parser", skip(contents))]
129    pub fn from(contents: &'a [u8]) -> Result<Self> {
130        if !bytes_offset_match(contents, 0, &MAGIC) {
131            bail!("Not a PEF file");
132        }
133
134        let arch = {
135            if bytes_offset_match(contents, 8, &PWPC) {
136                Some(Architecture::PowerPC)
137            } else if bytes_offset_match(contents, 8, &M68K) {
138                Some(Architecture::M68k)
139            } else {
140                None
141            }
142        };
143
144        let section_count = u16_from_offset(contents, 32, Ordering::BigEndian).unwrap_or_default();
145        let inst_section_count =
146            u16_from_offset(contents, 34, Ordering::BigEndian).unwrap_or_default();
147
148        let mut sections = Sections::default();
149        for section_index in 0..(section_count + inst_section_count) as usize {
150            // There seems to be an issue after "section_count" number of sections where
151            // the sizes or needed offset value changes, and the incoming values don't
152            // match what one would expect when looking at the binary with a hex editor.
153            let offset_this_section = HEADER_SIZE + section_index * SECTION_HEADER_SIZE;
154            if offset_this_section > contents.len() {
155                break;
156            }
157            let this_section = SectionHeader::from(
158                &contents[offset_this_section..offset_this_section + HEADER_SIZE],
159            );
160
161            let section_name = {
162                let default = format!("Unnamed section {section_index}");
163                if let Some(offset) = this_section.name_offset {
164                    string_from_offset(contents, offset).unwrap_or(default)
165                } else {
166                    default
167                }
168            };
169
170            sections.push(Section {
171                name: section_name,
172                is_executable: this_section.section_kind == 0 || this_section.section_kind == 8,
173                size: this_section.packed_size as usize,
174                offset: this_section.container_offset as usize,
175                virtual_size: 0,
176                virtual_address: 0,
177                data: None,
178                entropy: 0.0,
179            });
180        }
181
182        Ok(Self {
183            arch,
184            ordering: Ordering::BigEndian,
185            os: OperatingSystem::MacOS_Classic,
186            sections: Some(sections),
187            timestamp: u32_from_offset(contents, 16, Ordering::BigEndian).unwrap_or_default(),
188            contents,
189        })
190    }
191
192    /// Compiled timestamp as UTC
193    ///
194    /// # Panics
195    ///
196    /// This code won't panic despite some `.unwrap()` calls.
197    #[must_use]
198    pub fn compiled_date(&self) -> DateTime<Utc> {
199        let janone1940 = DateTime::from_naive_utc_and_offset(
200            NaiveDateTime::parse_from_str("1904-01-01 00:00:00", "%Y-%m-%d %H:%M:%S").unwrap(),
201            Utc,
202        );
203        janone1940 + TimeDelta::try_seconds(i64::from(self.timestamp)).unwrap()
204    }
205}
206
207impl ExecutableFile for Pef<'_> {
208    fn architecture(&self) -> Option<Architecture> {
209        self.arch
210    }
211
212    fn pointer_size(&self) -> usize {
213        32
214    }
215
216    fn operating_system(&self) -> OperatingSystem {
217        self.os
218    }
219
220    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
221        Some(self.compiled_date())
222    }
223
224    #[allow(clippy::cast_possible_truncation)]
225    fn num_sections(&self) -> u32 {
226        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
227    }
228
229    fn sections(&self) -> Option<&Sections<'_>> {
230        self.sections.as_ref()
231    }
232
233    fn import_hash(&self) -> Option<Uuid> {
234        None
235    }
236
237    fn fuzzy_imports(&self) -> Option<String> {
238        None
239    }
240}
241
242impl SpecimenFile for Pef<'_> {
243    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
244
245    fn type_name(&self) -> &'static str {
246        "PEF"
247    }
248}
249
250impl Display for Pef<'_> {
251    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
252        writeln!(f, "PEF file:")?;
253        writeln!(f, "\tOS: {}", self.os)?;
254        if let Some(arch) = self.arch {
255            writeln!(f, "\tArchitecture: {arch}")?;
256        }
257        writeln!(f, "\tOrdering: {}", self.ordering)?;
258        if let Some(sections) = &self.sections {
259            writeln!(f, "\t{} sections:", sections.len())?;
260            for section in sections {
261                writeln!(f, "\t\t{section}")?;
262            }
263        }
264        writeln!(
265            f,
266            "\tCompiled: {:?}",
267            self.compiled_date().format("%Y-%m-%d %H:%M:%S").to_string()
268        )?;
269        writeln!(f, "\tSize: {}", self.contents.len())?;
270        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use rstest::rstest;
278
279    #[test]
280    fn beos() {
281        const BYTES: &[u8] = include_bytes!("../../../testdata/pef/BeApp");
282
283        let pef = Pef::from(BYTES).unwrap();
284        eprintln!("BeOS:\n{pef}");
285        assert_eq!(pef.arch, Some(Architecture::PowerPC));
286    }
287
288    #[rstest]
289    #[case(include_bytes!("../../../testdata/pef/MacOS_1"))]
290    #[case(include_bytes!("../../../testdata/pef/MacOS_2"))]
291    fn macos(#[case] bytes: &[u8]) {
292        let pef = Pef::from(bytes).unwrap();
293        eprintln!("Mac OS:\n{pef}");
294        assert_eq!(pef.arch, Some(Architecture::PowerPC));
295    }
296}