macbinary/
lib.rs

1#![cfg_attr(feature = "no_std", no_std)]
2#![deny(missing_docs)]
3
4//! MacBinary and resource fork parser
5//!
6//! ### Specifications:
7//!
8//! - [MacBinary I](https://web.archive.org/web/20050307030202/http://www.lazerware.com/formats/macbinary/macbinary.html)
9//! - [MacBinary II](https://web.archive.org/web/20050305042909/http://www.lazerware.com/formats/macbinary/macbinary_ii.html)
10//! - [MacBinary III](https://web.archive.org/web/20050305044255/http://www.lazerware.com/formats/macbinary/macbinary_iii.html)
11//!
12//! #### Other references:
13//!
14//! - [Detecting MacBinary format](https://entropymine.wordpress.com/2019/02/13/detecting-macbinary-format/)
15
16// TODO
17// - no_std/WASM
18// - zero-copy, ttf-parser style
19
20use core::fmt::{self, Display, Formatter};
21
22use crc::{Crc, CRC_16_XMODEM};
23#[cfg(feature = "no_std")]
24use heapless::String;
25
26use crate::binary::read::{ReadBinary, ReadBinaryDep, ReadCtxt, ReadFrom, ReadScope};
27use crate::binary::{NumFrom, U32Be};
28use crate::macroman::FromMacRoman;
29
30pub(crate) mod binary;
31pub(crate) mod error;
32mod macroman;
33pub mod resource;
34#[cfg(test)]
35mod test;
36#[cfg(target_family = "wasm")]
37mod wasm;
38
39const MBIN_SIG: u32 = u32::from_be_bytes(*b"mBIN");
40
41pub use crate::error::ParseError;
42pub use crate::resource::ResourceFork;
43
44/// A four-character code
45///
46/// A 32-bit number that typically holds 4 8-bit ASCII characters, used for type and creator
47/// codes, and resource types. Eg. `mBIN` `SIZE` `ICON` `APPL`.
48#[derive(Copy, Clone, Eq, PartialEq)]
49pub struct FourCC(pub u32);
50
51/// A parsed MacBinary file containing metadata, data fork (if present), and resource fork (if present)
52pub struct MacBinary<'a> {
53    version: Version,
54    header: Header<'a>,
55    data_fork: &'a [u8],
56    rsrc_fork: &'a [u8],
57}
58
59/// MacBinary header
60#[allow(unused)]
61struct Header<'a> {
62    filename: &'a [u8],
63    secondary_header_len: u16,
64    data_fork_len: u32,
65    rsrc_fork_len: u32,
66    file_type: FourCC,
67    file_creator: FourCC,
68    finder_flags: u8,
69    vpos: u16,
70    hpos: u16,
71    window_or_folder_id: u16,
72    protected: bool,
73    created: u32,
74    modified: u32,
75    comment_len: u16,
76    finder_flags2: u8,
77    signature: FourCC,
78    /// Script of file name (from the `fdScript` field of an `fxInfo` record). since: MacBinary III
79    ///
80    /// > The script system for displaying the file’s name. Ordinarily, the
81    /// > Finder (and the Standard File Package) displays the names of all
82    /// > desktop objects in the system script, which depends on the
83    /// > region-specific configuration of the system. The high bit of the byte
84    /// > in the `fdScript` field is set by default to 0, which causes the Finder
85    /// > to display the filename in the current system script. If the high bit is
86    /// > set to 1, the Finder (and the Standard File Package) displays the
87    /// > filename and directory name in the script whose code is recorded in
88    /// > the remaining 7 bits.
89    ///
90    /// https://developer.apple.com/library/archive/documentation/mac/pdf/MacintoshToolboxEssentials.pdf
91    script: u8,
92    extended_finder_flags: u8,
93    version: u8,
94    min_version: u8,
95    crc: u16,
96}
97
98/// MacBinary version.
99#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
100pub enum Version {
101    /// MacBinary I
102    I = 1,
103    /// MacBinary II
104    II = 2,
105    /// MacBinary III
106    III = 3,
107}
108
109/// Determine if the supplied data looks like MacBinary data.
110pub fn detect(data: &[u8]) -> Option<Version> {
111    // All MacBinary files start with a 128-byte header and the first byte is zero
112    (data.len() >= 128 && data[0] == 0).then_some(())?;
113
114    // To determine if a header is a valid MacBinary header, first take advantage of the new MacBinary III signature located at offset 102
115    if ReadScope::new(&data[102..][..4]).read::<FourCC>() == Ok(FourCC(MBIN_SIG)) {
116        return Some(Version::III);
117    }
118
119    // If it is not a MacBinary III header, start by checking bytes 0 and 74 - they should both be zero. If they are both zero, either (a) the CRC should match, which means it is a MacBinary II file, or (b) byte 82 is zero, which means it may be a MacBinary I file.
120    if data[74] != 0 || data[82] != 0 {
121        return None;
122    }
123
124    let crc = u16::from_be_bytes(data[124..][..2].try_into().unwrap());
125    if crc == calc_crc(&data[..124]) {
126        return Some(Version::II);
127    }
128
129    // Check for MacBinary I
130    // Offsets 101-125, Byte, should all be 0.
131    // Offset 2, Byte, (the length of the file name) should be in the range of 1-63.
132    //   Note: It says Offset 2 but the length of the file name is at offset 1
133    // Offsets 83 and 87, Long Word, (the length of the forks) should be in the range of 0-$007F FFFF.
134    let data_fork_len = u32::from_be_bytes(data[83..][..4].try_into().unwrap());
135    let rsrc_fork_len = u32::from_be_bytes(data[87..][..4].try_into().unwrap());
136    let macbinary1 = data[101..=125].iter().all(|byte| *byte == 0)
137        && (1..=63).contains(&data[1])
138        && data_fork_len <= 0x007F_FFFF
139        && rsrc_fork_len <= 0x007F_FFFF;
140
141    if macbinary1 {
142        Some(Version::I)
143    } else {
144        None
145    }
146}
147
148/// Parse a MacBinary encoded file.
149pub fn parse(data: &[u8]) -> Result<MacBinary<'_>, ParseError> {
150    let Some(version) = detect(data) else {
151        return Err(ParseError::BadVersion) // FIXME: Better error type
152    };
153    ReadScope::new(data).read_dep::<MacBinary<'_>>(version)
154}
155
156impl ReadBinary for Header<'_> {
157    type HostType<'a> = Header<'a>;
158
159    fn read<'a>(ctxt: &mut ReadCtxt<'a>) -> Result<Self::HostType<'a>, ParseError> {
160        // old version number, must be kept at zero for compatibility
161        let _ = ctxt.read_u8()?;
162        // Length of filename (must be in the range 1-31)
163        let filename_len = ctxt.read_u8()?;
164        ctxt.check((1..=31).contains(&filename_len))?; // TODO: 1-63?
165                                                       // filename (only "length" bytes are significant).
166        let filename_data = ctxt.read_slice(63)?;
167        // file type (normally expressed as four characters)
168        let file_type = ctxt.read::<FourCC>()?;
169        // file creator (normally expressed as four characters)
170        let file_creator = ctxt.read::<FourCC>()?;
171        // original Finder flags Bit 7 - isAlias. Bit 6 - isInvisible. Bit 5 - hasBundle. Bit 4 - nameLocked. Bit 3 - isStationery. Bit 2 - hasCustomIcon. Bit 1 - reserved. Bit 0 - hasBeenInited.
172        let finder_flags = ctxt.read_u8()?;
173        // zero fill, must be zero for compatibility
174        let _ = ctxt.read_u8()?;
175        // file's vertical position within its window.
176        let vpos = ctxt.read_u16be()?;
177        // file's horizontal position within its window.
178        let hpos = ctxt.read_u16be()?;
179        // file's window or folder ID.
180        let window_or_folder_id = ctxt.read_u16be()?;
181        // "Protected" flag (in low order bit).
182        let protected = ctxt.read_u8()?;
183        // zero fill, must be zero for compatibility
184        let _ = ctxt.read_u8()?;
185        // Data Fork length (bytes, zero if no Data Fork).
186        let data_fork_len = ctxt.read_u32be()?;
187        // Resource Fork length (bytes, zero if no R.F.).
188        let rsrc_fork_len = ctxt.read_u32be()?;
189        // File's creation date
190        let created = ctxt.read_u32be()?;
191        // File's "last modified" date.
192        let modified = ctxt.read_u32be()?;
193        // length of Get Info comment to be sent after the resource fork (if implemented, see below).
194        let comment_len = ctxt.read_u16be()?;
195        // Finder Flags, bits 0-7. (Bits 8-15 are already in byte 73) Bit 7 - hasNoInits Bit 6 - isShared Bit 5 - requiresSwitchLaunch Bit 4 - ColorReserved Bits 1-3 - color Bit 0 - isOnDesk
196        let finder_flags2 = ctxt.read_u8()?;
197        // signature for identification purposes ('mBIN')
198        let signature = ctxt.read::<FourCC>()?;
199        // script of file name (from the fdScript field of an fxInfo record)
200        let script = ctxt.read_u8()?;
201        // extended Finder flags (from the fdXFlags field of an fxInfo record)
202        let extended_finder_flags = ctxt.read_u8()?;
203        // Bytes 108-115 unused (must be zeroed by creators, must be ignored by readers)
204        let _ = ctxt.read_slice(8)?;
205        // Length of total files when packed files are unpacked. As of the writing of this document, this field has never been used.
206        let _ = ctxt.read_u32be()?;
207        // Length of a secondary header. If this is non-zero, skip this many bytes (rounded up to the next multiple of 128). This is for future expansion only, when sending files with MacBinary, this word should be zero.
208        let secondary_header_len = ctxt.read_u16be()?;
209        // Version number of MacBinary III that the uploading program is written for (the version is 130 for MacBinary III)
210        let version = ctxt.read_u8()?;
211        // Minimum MacBinary version needed to read this file (set this value at 129 for backwards compatibility with MacBinary II)
212        // field: u8,
213        let min_version = ctxt.read_u8()?;
214        // CRC of previous 124 bytes
215        let crc = ctxt.read_u16be()?;
216        // Reserved for computer type and OS ID (this field will be zero for the current Macintosh).
217        let _ = ctxt.read_u16be()?;
218
219        Ok(Header {
220            filename: &filename_data[..usize::from(filename_len)],
221            file_type,
222            file_creator,
223            finder_flags,
224            vpos,
225            hpos,
226            window_or_folder_id,
227            protected: protected != 0,
228            data_fork_len,
229            rsrc_fork_len,
230            created,
231            modified,
232            comment_len,
233            finder_flags2,
234            signature,
235            script,
236            extended_finder_flags,
237            secondary_header_len,
238            version,
239            min_version,
240            crc,
241        })
242    }
243}
244
245impl ReadBinaryDep for MacBinary<'_> {
246    type Args<'a> = Version;
247    type HostType<'a> = MacBinary<'a>;
248
249    fn read_dep<'a>(
250        ctxt: &mut ReadCtxt<'a>,
251        version: Version,
252    ) -> Result<Self::HostType<'a>, ParseError> {
253        let crc_data = ctxt.scope().data().get(..124).ok_or(ParseError::BadEof)?;
254
255        // The binary format consists of a 128-byte header containing all the information necessary
256        // to reproduce the document's directory entry on the receiving Macintosh; followed by the
257        // document's Data Fork (if it has one), padded with nulls to a multiple of 128 bytes (if
258        // necessary); followed by the document's Resource Fork (again, padded if necessary). The
259        // lengths of these forks (either or both of which may be zero) are contained in the
260        // header.
261        let header = ctxt.read::<Header<'_>>()?;
262
263        // Check the CRC
264        let crc = calc_crc(crc_data);
265        if version >= Version::II && crc != header.crc {
266            return Err(ParseError::CrcMismatch);
267        }
268
269        // Skip secondary header if present, rounding up to next multiple of 128
270        let _ = ctxt.read_slice(usize::from(next_u16_multiple_of_128(
271            header.secondary_header_len,
272        )?))?;
273
274        // Read the data fork
275        let data_fork = ctxt.read_slice(usize::num_from(header.data_fork_len))?;
276
277        // Skip padding
278        let padding = next_u32_multiple_of_128(header.data_fork_len)? - header.data_fork_len;
279        let _ = ctxt.read_slice(usize::num_from(padding))?;
280
281        // Read the resource fork
282        let rsrc_fork = ctxt.read_slice(usize::num_from(header.rsrc_fork_len))?;
283
284        Ok(MacBinary {
285            version,
286            header,
287            data_fork,
288            rsrc_fork,
289        })
290    }
291}
292
293impl MacBinary<'_> {
294    /// Returns the version of this MacBinary file.
295    pub fn version(&self) -> Version {
296        self.version
297    }
298
299    /// The file name of the file encoded in this MacBinary file.
300    #[cfg(not(feature = "no_std"))]
301    pub fn filename(&self) -> String {
302        // For the purposes of this library we consider the system script to be Mac Roman.
303        // The script field can indicate a different script if the high-bit is set though.
304        // If the high-bit is set but the remaining 7-bits are zero that means it's still
305        // MacRoman.
306        // if self.header.script & 0x80 == 0x80 && self.header.script & !0x80 != 0 {
307        //     todo!("Handle non-macroman script")
308        // } else {
309        //     String::from_macroman(self.header.filename)
310        // }
311        // TODO Handle non-macroman script
312        String::from_macroman(self.header.filename)
313    }
314
315    /// The file name of the file encoded in this MacBinary file.
316    ///
317    /// The raw name can't be longer than 63 bytes in length. However,
318    /// this method converts the raw bytes from MacRoman into UTF-8 string and many non-ASCII
319    /// MacRoman bytes encode to more than one byte in UTF-8. This method will return `None` if
320    /// the `N` parameter is too small to hold the UTF-8 string.
321    #[cfg(feature = "no_std")]
322    pub fn filename<const N: usize>(&self) -> Option<String<N>> {
323        // TODO: Handle non-macroman script
324        String::try_from_macroman(self.header.filename)
325    }
326
327    /// The raw filename bytes
328    pub fn filename_bytes(&self) -> &[u8] {
329        self.header.filename
330    }
331
332    /// The file's creator code
333    pub fn file_creator(&self) -> FourCC {
334        self.header.file_creator
335    }
336
337    /// The file's type code
338    pub fn file_type(&self) -> FourCC {
339        self.header.file_type
340    }
341
342    /// File creation date (UNIX timestamp)
343    pub fn created(&self) -> u32 {
344        mactime(self.header.created)
345    }
346
347    /// File last modified date (UNIX timestamp)
348    pub fn modified(&self) -> u32 {
349        mactime(self.header.modified)
350    }
351
352    /// Data fork data
353    pub fn data_fork(&self) -> &[u8] {
354        self.data_fork
355    }
356
357    /// Resource fork data
358    pub fn resource_fork_raw(&self) -> &[u8] {
359        self.rsrc_fork
360    }
361
362    /// Parsed resource fork
363    ///
364    /// Note: Not all files have resource fork data. This method will return None if the resource
365    /// fork is empty.
366    pub fn resource_fork(&self) -> Result<Option<ResourceFork<'_>>, ParseError> {
367        if self.rsrc_fork.is_empty() {
368            return Ok(None);
369        }
370
371        ResourceFork::new(self.rsrc_fork).map(Some)
372    }
373}
374
375impl ReadFrom for FourCC {
376    type ReadType = U32Be;
377
378    fn from(value: u32) -> Self {
379        FourCC(value)
380    }
381}
382
383impl Display for FourCC {
384    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
385        let tag = self.0;
386        let bytes = tag.to_be_bytes();
387        if bytes.iter().all(|c| c.is_ascii() && !c.is_ascii_control()) {
388            let s = core::str::from_utf8(&bytes).unwrap(); // unwrap safe due to above check
389            s.fmt(f)
390        } else {
391            write!(f, "0x{:08x}", tag)
392        }
393    }
394}
395
396impl fmt::Debug for FourCC {
397    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
398        write!(f, "'{}'", self)
399    }
400}
401
402fn next_u16_multiple_of_128(value: u16) -> Result<u16, ParseError> {
403    let rem = value % 128;
404    if rem == 0 {
405        Ok(value)
406    } else {
407        value.checked_add(128 - rem).ok_or(ParseError::Overflow)
408    }
409}
410
411fn next_u32_multiple_of_128(value: u32) -> Result<u32, ParseError> {
412    let rem = value % 128;
413    if rem == 0 {
414        Ok(value)
415    } else {
416        value.checked_add(128 - rem).ok_or(ParseError::Overflow)
417    }
418}
419
420/// Convert Mac OS timestamp to UNIX timestamp
421///
422/// The Mac OS epoch is 1 January 1904, UNIX epoch is 1 Jan 1970.
423fn mactime(timestamp: u32) -> u32 {
424    // 66 years from 1904 to 1970, 17 leap years, 86400 seconds in a day
425    const OFFSET: u32 = 66 * 365 * 86400 + (17 * 86400);
426    timestamp.wrapping_sub(OFFSET)
427}
428
429fn calc_crc(data: &[u8]) -> u16 {
430    let crc: Crc<u16> = Crc::<u16>::new(&CRC_16_XMODEM);
431    crc.checksum(data)
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use crate::test::read_fixture;
438
439    #[test]
440    fn test_next_multiple() {
441        assert_eq!(next_u16_multiple_of_128(0), Ok(0));
442        assert_eq!(next_u16_multiple_of_128(3), Ok(128));
443        assert_eq!(next_u16_multiple_of_128(128), Ok(128));
444        assert_eq!(next_u16_multiple_of_128(129), Ok(256));
445
446        assert_eq!(next_u32_multiple_of_128(0), Ok(0));
447        assert_eq!(next_u32_multiple_of_128(3), Ok(128));
448        assert_eq!(next_u32_multiple_of_128(128), Ok(128));
449        assert_eq!(next_u32_multiple_of_128(129), Ok(256));
450    }
451
452    #[test]
453    fn test_next_multiple_overflow() {
454        assert_eq!(
455            next_u16_multiple_of_128(u16::MAX - 3),
456            Err(ParseError::Overflow)
457        );
458        assert_eq!(
459            next_u32_multiple_of_128(u32::MAX - 3),
460            Err(ParseError::Overflow)
461        );
462    }
463
464    fn check_text_file(file: &MacBinary, version: Version) {
465        assert_eq!(file.version(), version);
466        assert_eq!(file.filename(), "Text File");
467        assert_eq!(file.file_type(), FourCC(u32::from_be_bytes(*b"TEXT")));
468        assert_eq!(file.file_creator(), FourCC(u32::from_be_bytes(*b"R*ch"))); // BBEdit
469        assert_eq!(file.data_fork(), b"This is a test file.\r");
470        assert_eq!(file.resource_fork_raw().len(), 1454);
471    }
472
473    #[test]
474    fn test_macbinary_1() {
475        let data = read_fixture("tests/Text File I.Bin");
476        let file = parse(&data).unwrap();
477
478        check_text_file(&file, Version::I);
479    }
480
481    #[test]
482    fn test_macbinary_2() {
483        let data = read_fixture("tests/Text File II.bin");
484        let file = parse(&data).unwrap();
485
486        check_text_file(&file, Version::II);
487    }
488
489    #[test]
490    fn test_macbinary_3() {
491        let data = read_fixture("tests/Text File.bin");
492        let file = parse(&data).unwrap();
493
494        check_text_file(&file, Version::III);
495    }
496
497    #[test]
498    fn test_no_resource_fork() {
499        let data = read_fixture("tests/No resource fork.txt.bin");
500        let file = parse(&data).unwrap();
501
502        assert_eq!(file.version(), Version::III);
503        assert!(file.resource_fork().unwrap().is_none());
504    }
505
506    #[test]
507    fn test_dates() {
508        let data = read_fixture("tests/Date Test.bin");
509        let file = parse(&data).unwrap();
510
511        assert_eq!(file.version(), Version::III);
512        assert_eq!(file.filename(), "Date Test");
513        assert_eq!(file.file_type(), FourCC(u32::from_be_bytes(*b"TEXT")));
514        assert_eq!(file.file_creator(), FourCC(u32::from_be_bytes(*b"MPS "))); // MPW Shell
515        assert_eq!(file.data_fork(), b"Sunday, 26 March 2023 10:00:52 AM\r");
516        assert_eq!(file.created(), 1679824852);
517        assert_eq!(file.modified(), 1679824852);
518    }
519}