macbinary 0.2.1

MacBinary and resource fork parser
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
#![cfg_attr(feature = "no_std", no_std)]
#![deny(missing_docs)]

//! MacBinary and resource fork parser
//!
//! ### Specifications:
//!
//! - [MacBinary I](https://web.archive.org/web/20050307030202/http://www.lazerware.com/formats/macbinary/macbinary.html)
//! - [MacBinary II](https://web.archive.org/web/20050305042909/http://www.lazerware.com/formats/macbinary/macbinary_ii.html)
//! - [MacBinary III](https://web.archive.org/web/20050305044255/http://www.lazerware.com/formats/macbinary/macbinary_iii.html)
//!
//! #### Other references:
//!
//! - [Detecting MacBinary format](https://entropymine.wordpress.com/2019/02/13/detecting-macbinary-format/)

// TODO
// - no_std/WASM
// - zero-copy, ttf-parser style

use core::fmt::{self, Display, Formatter};

use crc::{Crc, CRC_16_XMODEM};
#[cfg(feature = "no_std")]
use heapless::String;

use crate::binary::read::{ReadBinary, ReadBinaryDep, ReadCtxt, ReadFrom, ReadScope};
use crate::binary::{NumFrom, U32Be};
use crate::macroman::FromMacRoman;

pub(crate) mod binary;
pub(crate) mod error;
mod macroman;
pub mod resource;
#[cfg(test)]
mod test;
#[cfg(target_family = "wasm")]
mod wasm;

const MBIN_SIG: u32 = u32::from_be_bytes(*b"mBIN");

pub use crate::error::ParseError;
pub use crate::resource::ResourceFork;

/// A four-character code
///
/// A 32-bit number that typically holds 4 8-bit ASCII characters, used for type and creator
/// codes, and resource types. Eg. `mBIN` `SIZE` `ICON` `APPL`.
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct FourCC(pub u32);

/// A parsed MacBinary file containing metadata, data fork (if present), and resource fork (if present)
pub struct MacBinary<'a> {
    version: Version,
    header: Header<'a>,
    data_fork: &'a [u8],
    rsrc_fork: &'a [u8],
}

/// MacBinary header
#[allow(unused)]
struct Header<'a> {
    filename: &'a [u8],
    secondary_header_len: u16,
    data_fork_len: u32,
    rsrc_fork_len: u32,
    file_type: FourCC,
    file_creator: FourCC,
    finder_flags: u8,
    vpos: u16,
    hpos: u16,
    window_or_folder_id: u16,
    protected: bool,
    created: u32,
    modified: u32,
    comment_len: u16,
    finder_flags2: u8,
    signature: FourCC,
    /// Script of file name (from the `fdScript` field of an `fxInfo` record). since: MacBinary III
    ///
    /// > The script system for displaying the file’s name. Ordinarily, the
    /// > Finder (and the Standard File Package) displays the names of all
    /// > desktop objects in the system script, which depends on the
    /// > region-specific configuration of the system. The high bit of the byte
    /// > in the `fdScript` field is set by default to 0, which causes the Finder
    /// > to display the filename in the current system script. If the high bit is
    /// > set to 1, the Finder (and the Standard File Package) displays the
    /// > filename and directory name in the script whose code is recorded in
    /// > the remaining 7 bits.
    ///
    /// https://developer.apple.com/library/archive/documentation/mac/pdf/MacintoshToolboxEssentials.pdf
    script: u8,
    extended_finder_flags: u8,
    version: u8,
    min_version: u8,
    crc: u16,
}

/// MacBinary version.
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
pub enum Version {
    /// MacBinary I
    I = 1,
    /// MacBinary II
    II = 2,
    /// MacBinary III
    III = 3,
}

/// Determine if the supplied data looks like MacBinary data.
pub fn detect(data: &[u8]) -> Option<Version> {
    // All MacBinary files start with a 128-byte header and the first byte is zero
    (data.len() >= 128 && data[0] == 0).then_some(())?;

    // To determine if a header is a valid MacBinary header, first take advantage of the new MacBinary III signature located at offset 102
    if ReadScope::new(&data[102..][..4]).read::<FourCC>() == Ok(FourCC(MBIN_SIG)) {
        return Some(Version::III);
    }

    // If it is not a MacBinary III header, start by checking bytes 0 and 74 - they should both be zero. If they are both zero, either (a) the CRC should match, which means it is a MacBinary II file, or (b) byte 82 is zero, which means it may be a MacBinary I file.
    if data[74] != 0 || data[82] != 0 {
        return None;
    }

    let crc = u16::from_be_bytes(data[124..][..2].try_into().unwrap());
    if crc == calc_crc(&data[..124]) {
        return Some(Version::II);
    }

    // Check for MacBinary I
    // Offsets 101-125, Byte, should all be 0.
    // Offset 2, Byte, (the length of the file name) should be in the range of 1-63.
    //   Note: It says Offset 2 but the length of the file name is at offset 1
    // Offsets 83 and 87, Long Word, (the length of the forks) should be in the range of 0-$007F FFFF.
    let data_fork_len = u32::from_be_bytes(data[83..][..4].try_into().unwrap());
    let rsrc_fork_len = u32::from_be_bytes(data[87..][..4].try_into().unwrap());
    let macbinary1 = data[101..=125].iter().all(|byte| *byte == 0)
        && (1..=63).contains(&data[1])
        && data_fork_len <= 0x007F_FFFF
        && rsrc_fork_len <= 0x007F_FFFF;

    if macbinary1 {
        Some(Version::I)
    } else {
        None
    }
}

/// Parse a MacBinary encoded file.
pub fn parse(data: &[u8]) -> Result<MacBinary<'_>, ParseError> {
    let Some(version) = detect(data) else {
        return Err(ParseError::BadVersion) // FIXME: Better error type
    };
    ReadScope::new(data).read_dep::<MacBinary<'_>>(version)
}

impl ReadBinary for Header<'_> {
    type HostType<'a> = Header<'a>;

    fn read<'a>(ctxt: &mut ReadCtxt<'a>) -> Result<Self::HostType<'a>, ParseError> {
        // old version number, must be kept at zero for compatibility
        let _ = ctxt.read_u8()?;
        // Length of filename (must be in the range 1-31)
        let filename_len = ctxt.read_u8()?;
        ctxt.check((1..=31).contains(&filename_len))?; // TODO: 1-63?
                                                       // filename (only "length" bytes are significant).
        let filename_data = ctxt.read_slice(63)?;
        // file type (normally expressed as four characters)
        let file_type = ctxt.read::<FourCC>()?;
        // file creator (normally expressed as four characters)
        let file_creator = ctxt.read::<FourCC>()?;
        // original Finder flags Bit 7 - isAlias. Bit 6 - isInvisible. Bit 5 - hasBundle. Bit 4 - nameLocked. Bit 3 - isStationery. Bit 2 - hasCustomIcon. Bit 1 - reserved. Bit 0 - hasBeenInited.
        let finder_flags = ctxt.read_u8()?;
        // zero fill, must be zero for compatibility
        let _ = ctxt.read_u8()?;
        // file's vertical position within its window.
        let vpos = ctxt.read_u16be()?;
        // file's horizontal position within its window.
        let hpos = ctxt.read_u16be()?;
        // file's window or folder ID.
        let window_or_folder_id = ctxt.read_u16be()?;
        // "Protected" flag (in low order bit).
        let protected = ctxt.read_u8()?;
        // zero fill, must be zero for compatibility
        let _ = ctxt.read_u8()?;
        // Data Fork length (bytes, zero if no Data Fork).
        let data_fork_len = ctxt.read_u32be()?;
        // Resource Fork length (bytes, zero if no R.F.).
        let rsrc_fork_len = ctxt.read_u32be()?;
        // File's creation date
        let created = ctxt.read_u32be()?;
        // File's "last modified" date.
        let modified = ctxt.read_u32be()?;
        // length of Get Info comment to be sent after the resource fork (if implemented, see below).
        let comment_len = ctxt.read_u16be()?;
        // Finder Flags, bits 0-7. (Bits 8-15 are already in byte 73) Bit 7 - hasNoInits Bit 6 - isShared Bit 5 - requiresSwitchLaunch Bit 4 - ColorReserved Bits 1-3 - color Bit 0 - isOnDesk
        let finder_flags2 = ctxt.read_u8()?;
        // signature for identification purposes ('mBIN')
        let signature = ctxt.read::<FourCC>()?;
        // script of file name (from the fdScript field of an fxInfo record)
        let script = ctxt.read_u8()?;
        // extended Finder flags (from the fdXFlags field of an fxInfo record)
        let extended_finder_flags = ctxt.read_u8()?;
        // Bytes 108-115 unused (must be zeroed by creators, must be ignored by readers)
        let _ = ctxt.read_slice(8)?;
        // Length of total files when packed files are unpacked. As of the writing of this document, this field has never been used.
        let _ = ctxt.read_u32be()?;
        // Length of a secondary header. If this is non-zero, skip this many bytes (rounded up to the next multiple of 128). This is for future expansion only, when sending files with MacBinary, this word should be zero.
        let secondary_header_len = ctxt.read_u16be()?;
        // Version number of MacBinary III that the uploading program is written for (the version is 130 for MacBinary III)
        let version = ctxt.read_u8()?;
        // Minimum MacBinary version needed to read this file (set this value at 129 for backwards compatibility with MacBinary II)
        // field: u8,
        let min_version = ctxt.read_u8()?;
        // CRC of previous 124 bytes
        let crc = ctxt.read_u16be()?;
        // Reserved for computer type and OS ID (this field will be zero for the current Macintosh).
        let _ = ctxt.read_u16be()?;

        Ok(Header {
            filename: &filename_data[..usize::from(filename_len)],
            file_type,
            file_creator,
            finder_flags,
            vpos,
            hpos,
            window_or_folder_id,
            protected: protected != 0,
            data_fork_len,
            rsrc_fork_len,
            created,
            modified,
            comment_len,
            finder_flags2,
            signature,
            script,
            extended_finder_flags,
            secondary_header_len,
            version,
            min_version,
            crc,
        })
    }
}

impl ReadBinaryDep for MacBinary<'_> {
    type Args<'a> = Version;
    type HostType<'a> = MacBinary<'a>;

    fn read_dep<'a>(
        ctxt: &mut ReadCtxt<'a>,
        version: Version,
    ) -> Result<Self::HostType<'a>, ParseError> {
        let crc_data = ctxt.scope().data().get(..124).ok_or(ParseError::BadEof)?;

        // The binary format consists of a 128-byte header containing all the information necessary
        // to reproduce the document's directory entry on the receiving Macintosh; followed by the
        // document's Data Fork (if it has one), padded with nulls to a multiple of 128 bytes (if
        // necessary); followed by the document's Resource Fork (again, padded if necessary). The
        // lengths of these forks (either or both of which may be zero) are contained in the
        // header.
        let header = ctxt.read::<Header<'_>>()?;

        // Check the CRC
        let crc = calc_crc(crc_data);
        if version >= Version::II && crc != header.crc {
            return Err(ParseError::CrcMismatch);
        }

        // Skip secondary header if present, rounding up to next multiple of 128
        let _ = ctxt.read_slice(usize::from(next_u16_multiple_of_128(
            header.secondary_header_len,
        )?))?;

        // Read the data fork
        let data_fork = ctxt.read_slice(usize::num_from(header.data_fork_len))?;

        // Skip padding
        let padding = next_u32_multiple_of_128(header.data_fork_len)? - header.data_fork_len;
        let _ = ctxt.read_slice(usize::num_from(padding))?;

        // Read the resource fork
        let rsrc_fork = ctxt.read_slice(usize::num_from(header.rsrc_fork_len))?;

        Ok(MacBinary {
            version,
            header,
            data_fork,
            rsrc_fork,
        })
    }
}

impl MacBinary<'_> {
    /// Returns the version of this MacBinary file.
    pub fn version(&self) -> Version {
        self.version
    }

    /// The file name of the file encoded in this MacBinary file.
    #[cfg(not(feature = "no_std"))]
    pub fn filename(&self) -> String {
        // For the purposes of this library we consider the system script to be Mac Roman.
        // The script field can indicate a different script if the high-bit is set though.
        // If the high-bit is set but the remaining 7-bits are zero that means it's still
        // MacRoman.
        // if self.header.script & 0x80 == 0x80 && self.header.script & !0x80 != 0 {
        //     todo!("Handle non-macroman script")
        // } else {
        //     String::from_macroman(self.header.filename)
        // }
        // TODO Handle non-macroman script
        String::from_macroman(self.header.filename)
    }

    /// The file name of the file encoded in this MacBinary file.
    ///
    /// The raw name can't be longer than 63 bytes in length. However,
    /// this method converts the raw bytes from MacRoman into UTF-8 string and many non-ASCII
    /// MacRoman bytes encode to more than one byte in UTF-8. This method will return `None` if
    /// the `N` parameter is too small to hold the UTF-8 string.
    #[cfg(feature = "no_std")]
    pub fn filename<const N: usize>(&self) -> Option<String<N>> {
        // TODO: Handle non-macroman script
        String::try_from_macroman(self.header.filename)
    }

    /// The raw filename bytes
    pub fn filename_bytes(&self) -> &[u8] {
        self.header.filename
    }

    /// The file's creator code
    pub fn file_creator(&self) -> FourCC {
        self.header.file_creator
    }

    /// The file's type code
    pub fn file_type(&self) -> FourCC {
        self.header.file_type
    }

    /// File creation date (UNIX timestamp)
    pub fn created(&self) -> u32 {
        mactime(self.header.created)
    }

    /// File last modified date (UNIX timestamp)
    pub fn modified(&self) -> u32 {
        mactime(self.header.modified)
    }

    /// Data fork data
    pub fn data_fork(&self) -> &[u8] {
        self.data_fork
    }

    /// Resource fork data
    pub fn resource_fork_raw(&self) -> &[u8] {
        self.rsrc_fork
    }

    /// Parsed resource fork
    ///
    /// Note: Not all files have resource fork data. This method will return None if the resource
    /// fork is empty.
    pub fn resource_fork(&self) -> Result<Option<ResourceFork<'_>>, ParseError> {
        if self.rsrc_fork.is_empty() {
            return Ok(None);
        }

        ResourceFork::new(self.rsrc_fork).map(Some)
    }
}

impl ReadFrom for FourCC {
    type ReadType = U32Be;

    fn from(value: u32) -> Self {
        FourCC(value)
    }
}

impl Display for FourCC {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        let tag = self.0;
        let bytes = tag.to_be_bytes();
        if bytes.iter().all(|c| c.is_ascii() && !c.is_ascii_control()) {
            let s = core::str::from_utf8(&bytes).unwrap(); // unwrap safe due to above check
            s.fmt(f)
        } else {
            write!(f, "0x{:08x}", tag)
        }
    }
}

impl fmt::Debug for FourCC {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        write!(f, "'{}'", self)
    }
}

fn next_u16_multiple_of_128(value: u16) -> Result<u16, ParseError> {
    let rem = value % 128;
    if rem == 0 {
        Ok(value)
    } else {
        value.checked_add(128 - rem).ok_or(ParseError::Overflow)
    }
}

fn next_u32_multiple_of_128(value: u32) -> Result<u32, ParseError> {
    let rem = value % 128;
    if rem == 0 {
        Ok(value)
    } else {
        value.checked_add(128 - rem).ok_or(ParseError::Overflow)
    }
}

/// Convert Mac OS timestamp to UNIX timestamp
///
/// The Mac OS epoch is 1 January 1904, UNIX epoch is 1 Jan 1970.
fn mactime(timestamp: u32) -> u32 {
    // 66 years from 1904 to 1970, 17 leap years, 86400 seconds in a day
    const OFFSET: u32 = 66 * 365 * 86400 + (17 * 86400);
    timestamp.wrapping_sub(OFFSET)
}

fn calc_crc(data: &[u8]) -> u16 {
    let crc: Crc<u16> = Crc::<u16>::new(&CRC_16_XMODEM);
    crc.checksum(data)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test::read_fixture;

    #[test]
    fn test_next_multiple() {
        assert_eq!(next_u16_multiple_of_128(0), Ok(0));
        assert_eq!(next_u16_multiple_of_128(3), Ok(128));
        assert_eq!(next_u16_multiple_of_128(128), Ok(128));
        assert_eq!(next_u16_multiple_of_128(129), Ok(256));

        assert_eq!(next_u32_multiple_of_128(0), Ok(0));
        assert_eq!(next_u32_multiple_of_128(3), Ok(128));
        assert_eq!(next_u32_multiple_of_128(128), Ok(128));
        assert_eq!(next_u32_multiple_of_128(129), Ok(256));
    }

    #[test]
    fn test_next_multiple_overflow() {
        assert_eq!(
            next_u16_multiple_of_128(u16::MAX - 3),
            Err(ParseError::Overflow)
        );
        assert_eq!(
            next_u32_multiple_of_128(u32::MAX - 3),
            Err(ParseError::Overflow)
        );
    }

    fn check_text_file(file: &MacBinary, version: Version) {
        assert_eq!(file.version(), version);
        assert_eq!(file.filename(), "Text File");
        assert_eq!(file.file_type(), FourCC(u32::from_be_bytes(*b"TEXT")));
        assert_eq!(file.file_creator(), FourCC(u32::from_be_bytes(*b"R*ch"))); // BBEdit
        assert_eq!(file.data_fork(), b"This is a test file.\r");
        assert_eq!(file.resource_fork_raw().len(), 1454);
    }

    #[test]
    fn test_macbinary_1() {
        let data = read_fixture("tests/Text File I.Bin");
        let file = parse(&data).unwrap();

        check_text_file(&file, Version::I);
    }

    #[test]
    fn test_macbinary_2() {
        let data = read_fixture("tests/Text File II.bin");
        let file = parse(&data).unwrap();

        check_text_file(&file, Version::II);
    }

    #[test]
    fn test_macbinary_3() {
        let data = read_fixture("tests/Text File.bin");
        let file = parse(&data).unwrap();

        check_text_file(&file, Version::III);
    }

    #[test]
    fn test_no_resource_fork() {
        let data = read_fixture("tests/No resource fork.txt.bin");
        let file = parse(&data).unwrap();

        assert_eq!(file.version(), Version::III);
        assert!(file.resource_fork().unwrap().is_none());
    }

    #[test]
    fn test_dates() {
        let data = read_fixture("tests/Date Test.bin");
        let file = parse(&data).unwrap();

        assert_eq!(file.version(), Version::III);
        assert_eq!(file.filename(), "Date Test");
        assert_eq!(file.file_type(), FourCC(u32::from_be_bytes(*b"TEXT")));
        assert_eq!(file.file_creator(), FourCC(u32::from_be_bytes(*b"MPS "))); // MPW Shell
        assert_eq!(file.data_fork(), b"Sunday, 26 March 2023 10:00:52 AM\r");
        assert_eq!(file.created(), 1679824852);
        assert_eq!(file.modified(), 1679824852);
    }
}