binhex-rs 0.1.0

Crate to read BinHex 4 encoded files
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
#![doc = include_str!("../README.md")]
mod fork_reader;
mod lookup_table;
mod seek_over;
mod six_bit_decoder;
mod six_bit_rle_reader;
mod util;

use std::fs;
use std::io;
use std::io::{Read as _, Seek as _};
use std::path;

use binrw::{binread, BinReaderExt};
use crc::CRC_16_XMODEM;
use fourcc_rs::FourCC;
use macintosh_utils::FinderFlags;

pub use fork_reader::ForkReader;
use seek_over::SeekOver;
use six_bit_rle_reader::SixBitRleReader;
use util::ReadByte;

use crate::six_bit_decoder::SixBitDecoder;

/// General error used by the crate
#[derive(Debug, thiserror::Error)]
pub enum Error {
    /// The BinHex 4.0 header could not be located
    #[error("The BinHex 4.0 header could not be located")]
    HeaderNotFound,

    /// A read or seek operation on the underlying reader failed
    #[error(transparent)]
    Io(#[from] io::Error),

    /// Some data could not be decoded from the underlying reader
    #[error(transparent)]
    BinRw(#[from] binrw::Error),

    /// The binhex encoded stream contains unknown characters
    #[error("An unexpected character appeared within encoded data")]
    InvalidCharacter,

    /// The underlying stream did not provide enough data for decompression
    #[error("Unexpected EOF")]
    UnexpectedEof,

    /// The seek operation is not supported
    #[error("Seeking backwards or from the end is not supported at the moment")]
    UnsupportedSeek,
}

impl From<Error> for io::Error {
    fn from(val: Error) -> Self {
        match val {
            Error::Io(io) => io,
            other => io::Error::other(Box::new(other)),
        }
    }
}

const VERIFICATION_CHUNK_SIZE: usize = 1 << 20 /* bytes => 1 Mb */;

#[derive(Debug)]
/// Identifies the location where checksum verification failed
pub enum Checksum {
    /// The archive header is broken
    Header,
    /// The data fork is invalid
    DataFork,
    /// The resource fork is invalid
    ResourceFork,
}

#[derive(Debug, thiserror::Error)]
/// Error used to signal invalid checksums
pub enum VerificationError {
    #[error("A checksum did not match")]
    /// A crc check has failed
    ChecksumMismatch(Checksum),

    #[error(transparent)]
    /// The underlying stream returned an io::Error during seek or read
    Io(#[from] io::Error),

    #[error(transparent)]
    /// BinRw was unable to decode some data
    BinRw(#[from] binrw::Error),
}

#[binread]
#[derive(Debug, Clone)]
#[br(big)]
/// On-disk structure describing contents of the BinHex archive
pub struct ArchiveHeader {
    /// Original name of the embedded file
    #[br(map(macintosh_utils::string))]
    pub name: String,
    #[br(temp)]
    _name_terminator: u8,
    /// Four-byte type code for the embedded file
    pub file_code: FourCC,
    /// Four-byte identifier for the program that created the file
    pub creator_code: FourCC,
    /// Finder flags as observed at the time of compression
    pub finder_flags: FinderFlags,
    /// Number of bytes in the uncompressed data fork
    pub data_len: u32,
    /// Number of bytes in the uncompressed resource fork
    pub resource_len: u32,
    /// CRC-16 checksum of the header bytes
    ///
    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
    pub header_checksum: u16,
}

impl ArchiveHeader {
    /// Fixed size of the header on disk
    pub const FIXED_SIZE: usize = 22;
}

/// Reader for BinHex archives
pub struct Archive<R> {
    header: ArchiveHeader,
    reader: SixBitRleReader<R>,
}

impl<R> Archive<R> {
    /// Original name of the embedded file
    pub fn name(&self) -> &str {
        self.header.name.as_str()
    }

    /// The file's four-byte type identifier
    pub fn file_code(&self) -> FourCC {
        self.header.file_code
    }

    /// The file's four-byte creator code
    pub fn creator_code(&self) -> FourCC {
        self.header.creator_code
    }

    /// Finder flags at the time of compression
    pub fn finder_flags(&self) -> FinderFlags {
        self.header.finder_flags
    }

    /// Number of bytes in the uncompressed data fork
    pub fn data_len(&self) -> usize {
        self.header.data_len as usize
    }

    /// Number of bytes in the uncompressed resource fork
    pub fn resource_len(&self) -> usize {
        self.header.resource_len as usize
    }

    /// CRC-16 checksum of the header bytes
    ///
    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
    pub fn header_checksum(&self) -> u16 {
        self.header.header_checksum
    }
}

impl<R: io::Read + io::Seek> Archive<R> {
    /// Fixed header that introduces the start of encoded data
    pub const HEADER: &[u8; 45] = b"(This file must be converted with BinHex 4.0)";

    /// Try to read an archive from the reader.
    ///
    /// It searches for the start marker and reads the archive header after that. Returns an error
    /// if either fails.
    pub fn try_from(mut inner: R) -> Result<Self, Error> {
        inner
            .seek_over_string(Self::HEADER)
            .map_err(|_| Error::HeaderNotFound)?;

        let data_start = loop {
            if inner.read_byte()? == b':' {
                break inner.stream_position()?;
            }
        };

        let mut inner = SixBitRleReader::new(inner, data_start);
        let header: ArchiveHeader = inner.read_be()?;

        Ok(Self {
            reader: inner,
            header,
        })
    }

    /// Open the data fork for reading
    ///
    /// Since reading backwards through RLE encoded data is expensive, this should be called before
    /// accessing the resource fork, if the application wants to read both
    pub fn data_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
        self.reset()?;

        let position = self.reader.position;
        let length = self.data_len() as u64;

        Ok(ForkReader::new(&mut self.reader, position, length))
    }

    /// Open the resource fork for reading
    ///
    /// Since reading backwards through RLE encoded data is expensive, this should be called after
    /// accessing the data fork, if the application wants to read both
    pub fn resource_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
        self.reset()?;

        self.reader.seek(io::SeekFrom::Current(
            self.header.data_len as i64 + /* skip crc as well */ 2,
        ))?;

        let position = self.reader.position;
        let length = self.resource_len() as u64;

        Ok(ForkReader::new(&mut self.reader, position, length))
    }

    /// Verify checksums of the whole archive, this resets the stream and reads the whole file to
    /// calculate the checksums.
    pub fn verify(&mut self) -> Result<(), VerificationError> {
        self.reset()?;

        let crc = crc::Crc::<u16>::new(&CRC_16_XMODEM);

        // Verify header checksum
        let mut buf = vec![0u8; ArchiveHeader::FIXED_SIZE];
        self.reader.read_exact(&mut buf)?;

        let name_length = buf[0] as usize;
        buf.append(&mut vec![0u8; name_length]);
        self.reader.read_exact(
            &mut buf[(ArchiveHeader::FIXED_SIZE)..(ArchiveHeader::FIXED_SIZE + name_length)],
        )?;

        let mut digest = crc.digest();
        digest.update(&buf);
        if digest.finalize() != 0 {
            return Err(VerificationError::ChecksumMismatch(Checksum::Header));
        }

        // Verify data fork checksum
        let mut chunk = [0u8; VERIFICATION_CHUNK_SIZE];
        let mut digest = crc.digest();
        let data_len_offset = name_length + 12;
        let data_len = u32::from_be_bytes([
            buf[data_len_offset],
            buf[data_len_offset + 1],
            buf[data_len_offset + 2],
            buf[data_len_offset + 3],
        ]) as usize
            + 2;

        for _ in 0..(data_len / VERIFICATION_CHUNK_SIZE) {
            self.reader.read_exact(&mut chunk)?;
            digest.update(&chunk);
        }

        let rest = data_len % VERIFICATION_CHUNK_SIZE;
        self.reader.read_exact(&mut chunk[0..rest])?;
        digest.update(&chunk[0..rest]);
        if digest.finalize() != 0 {
            return Err(VerificationError::ChecksumMismatch(Checksum::DataFork));
        }

        // Verify resource fork checksum
        let mut digest = crc.digest();
        let resource_len_offset = name_length + 16;
        let resource_len = u32::from_be_bytes([
            buf[resource_len_offset],
            buf[resource_len_offset + 1],
            buf[resource_len_offset + 2],
            buf[resource_len_offset + 3],
        ]) as usize
            + 2;
        for _ in 0..(resource_len / VERIFICATION_CHUNK_SIZE) {
            self.reader.read_exact(&mut chunk)?;
            digest.update(&chunk);
        }

        let rest = resource_len % VERIFICATION_CHUNK_SIZE;
        self.reader.read_exact(&mut chunk[0..rest])?;
        digest.update(&chunk[0..rest]);
        if digest.finalize() != 0 {
            return Err(VerificationError::ChecksumMismatch(Checksum::ResourceFork));
        }

        Ok(())
    }

    /// Returns the underlying reader that was used initially to open the archive
    /// Note that the read position has probably changed
    pub fn into_inner(self) -> R {
        self.reader.into_inner()
    }

    fn reset(&mut self) -> io::Result<()> {
        self.reader.reset()
    }
}

impl Archive<fs::File> {
    /// Try to open the archive specified by `path`
    pub fn open<P: AsRef<path::Path>>(path: P) -> Result<Self, Error> {
        Self::try_from(fs::File::open(path)?)
    }

    /// Try to clone the archive which would allow you to read resource and data forks
    /// simultaneously
    pub fn try_clone(&self) -> io::Result<Self> {
        Ok(Self {
            header: self.header.clone(),
            reader: self.reader.try_clone()?,
        })
    }
}

impl<R: Clone> Clone for Archive<R> {
    /// Clone the archive, allowing you to access resource and data forks at the same time
    fn clone(&self) -> Self {
        Self {
            reader: self.reader.clone(),
            header: self.header.clone(),
        }
    }
}

pub fn probe<R: io::Read + io::Seek>(reader: R) -> bool {
    Archive::try_from(reader).is_ok()
}

#[cfg(test)]
mod test {
    use std::fs;
    use std::io;
    use std::io::Read as _;

    use fourcc_rs::fourcc;

    use super::Archive;
    use crate::{Checksum, VerificationError};

    #[test]
    fn decode() {
        let file = fs::File::open("./sample-file.hqx").unwrap();
        let archive = Archive::try_from(file).unwrap();
        assert_eq!(archive.name(), "binhex.test.sit");
        assert_eq!(archive.file_code(), fourcc!("SITD"));
        assert_eq!(archive.creator_code(), fourcc!("SIT!"));
        assert_eq!(archive.data_len(), 380);
        assert_eq!(archive.resource_len(), 0);
        assert_eq!(archive.header_checksum(), 0x6e3c);
    }

    #[test]
    fn successfully_verify() {
        let mut reader = Archive::open("sample-file.hqx").unwrap();
        assert!(reader.verify().is_ok());
    }

    #[test]
    fn failed_header_verification() {
        let mut file = fs::File::open("./sample-file.hqx").unwrap();
        let mut buffer = Vec::new();
        file.read_to_end(&mut buffer).unwrap();

        // Corrupt a bit in the archive header
        // this will change the decoded filename from "binhex.test.sit" to "biéhex.test.sit"
        buffer[137 + 3] |= 1 << 3;

        let reader = io::Cursor::new(buffer);
        let mut reader = Archive::try_from(reader).unwrap();

        let result = reader.verify();
        assert!(matches!(
            result,
            Err(VerificationError::ChecksumMismatch(Checksum::Header))
        ));
    }

    #[test]
    fn failed_data_verification() {
        let mut file = fs::File::open("./sample-file.hqx").unwrap();
        let mut buffer = Vec::new();
        file.read_to_end(&mut buffer).unwrap();

        // corrupt a bit in the archive data
        buffer[240 + 3] |= 1 << 3;

        let reader = io::Cursor::new(buffer);
        let mut reader = Archive::try_from(reader).unwrap();

        let result = reader.verify();
        assert!(matches!(
            result,
            Err(VerificationError::ChecksumMismatch(Checksum::DataFork))
        ));
    }
}