Skip to main content

binhex/
lib.rs

1#![doc = include_str!("../README.md")]
2mod fork_reader;
3mod lookup_table;
4mod seek_over;
5mod six_bit_decoder;
6mod six_bit_rle_reader;
7mod util;
8
9use std::fs;
10use std::io;
11use std::io::{Read as _, Seek as _};
12use std::path;
13
14use binrw::{binread, BinReaderExt};
15use crc::CRC_16_XMODEM;
16use macintosh_utils::{FinderFlags, FourCC};
17
18pub use fork_reader::ForkReader;
19use seek_over::SeekOver;
20use six_bit_rle_reader::SixBitRleReader;
21use util::ReadByte;
22
23use crate::six_bit_decoder::SixBitDecoder;
24
25/// General error used by the crate
26#[derive(Debug, thiserror::Error)]
27pub enum Error {
28    /// The BinHex 4.0 header could not be located
29    #[error("The BinHex 4.0 header could not be located")]
30    HeaderNotFound,
31
32    /// A read or seek operation on the underlying reader failed
33    #[error(transparent)]
34    Io(#[from] io::Error),
35
36    /// Some data could not be decoded from the underlying reader
37    #[error(transparent)]
38    BinRw(#[from] binrw::Error),
39
40    /// The binhex encoded stream contains unknown characters
41    #[error("An unexpected character appeared within encoded data")]
42    InvalidCharacter,
43
44    /// The underlying stream did not provide enough data for decompression
45    #[error("Unexpected EOF")]
46    UnexpectedEof,
47
48    /// The seek operation is not supported
49    #[error("Seeking backwards or from the end is not supported at the moment")]
50    UnsupportedSeek,
51}
52
53impl From<Error> for io::Error {
54    fn from(val: Error) -> Self {
55        match val {
56            Error::Io(io) => io,
57            other => io::Error::other(Box::new(other)),
58        }
59    }
60}
61
62const VERIFICATION_CHUNK_SIZE: usize = 1 << 20 /* bytes => 1 Mb */;
63
64#[derive(Debug)]
65/// Identifies the location where checksum verification failed
66pub enum Checksum {
67    /// The archive header is broken
68    Header,
69    /// The data fork is invalid
70    DataFork,
71    /// The resource fork is invalid
72    ResourceFork,
73}
74
75#[derive(Debug, thiserror::Error)]
76/// Error used to signal invalid checksums
77pub enum VerificationError {
78    #[error("A checksum did not match")]
79    /// A crc check has failed
80    ChecksumMismatch(Checksum),
81
82    #[error(transparent)]
83    /// The underlying stream returned an io::Error during seek or read
84    Io(#[from] io::Error),
85
86    #[error(transparent)]
87    /// BinRw was unable to decode some data
88    BinRw(#[from] binrw::Error),
89}
90
91#[binread]
92#[derive(Debug, Clone)]
93#[br(big)]
94/// On-disk structure describing contents of the BinHex archive
95pub struct ArchiveHeader {
96    /// Original name of the embedded file
97    #[br(map(macintosh_utils::string))]
98    pub name: String,
99    #[br(temp)]
100    _name_terminator: u8,
101    /// Four-byte type code for the embedded file
102    pub file_code: FourCC,
103    /// Four-byte identifier for the program that created the file
104    pub creator_code: FourCC,
105    /// Finder flags as observed at the time of compression
106    pub finder_flags: FinderFlags,
107    /// Number of bytes in the uncompressed data fork
108    pub data_len: u32,
109    /// Number of bytes in the uncompressed resource fork
110    pub resource_len: u32,
111    /// CRC-16 checksum of the header bytes
112    ///
113    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
114    pub header_checksum: u16,
115}
116
117impl ArchiveHeader {
118    /// Fixed size of the header on disk
119    pub const FIXED_SIZE: usize = 22;
120}
121
122/// Reader for BinHex archives
123pub struct Archive<R> {
124    header: ArchiveHeader,
125    reader: SixBitRleReader<R>,
126    data_fork_start: u64,
127}
128
129impl<R> Archive<R> {
130    /// Original name of the embedded file
131    pub fn name(&self) -> &str {
132        self.header.name.as_str()
133    }
134
135    /// The file's four-byte type identifier
136    pub fn file_code(&self) -> FourCC {
137        self.header.file_code
138    }
139
140    /// The file's four-byte creator code
141    pub fn creator_code(&self) -> FourCC {
142        self.header.creator_code
143    }
144
145    /// Finder flags at the time of compression
146    pub fn finder_flags(&self) -> FinderFlags {
147        self.header.finder_flags
148    }
149
150    /// Number of bytes in the uncompressed data fork
151    pub fn data_len(&self) -> usize {
152        self.header.data_len as usize
153    }
154
155    /// Number of bytes in the uncompressed resource fork
156    pub fn resource_len(&self) -> usize {
157        self.header.resource_len as usize
158    }
159
160    /// CRC-16 checksum of the header bytes
161    ///
162    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
163    pub fn header_checksum(&self) -> u16 {
164        self.header.header_checksum
165    }
166}
167
168impl<R: io::Read + io::Seek> Archive<R> {
169    /// Fixed header that introduces the start of encoded data
170    pub const HEADER: &[u8; 45] = b"(This file must be converted with BinHex 4.0)";
171
172    /// Try to read an archive from the reader.
173    ///
174    /// It searches for the start marker and reads the archive header after that. Returns an error
175    /// if either fails.
176    pub fn try_from(mut inner: R) -> Result<Self, Error> {
177        inner
178            .seek_over_string(Self::HEADER)
179            .map_err(|_| Error::HeaderNotFound)?;
180
181        let data_start = loop {
182            if inner.read_byte()? == b':' {
183                break inner.stream_position()?;
184            }
185        };
186
187        let mut inner = SixBitRleReader::new(inner, data_start);
188        let header: ArchiveHeader = inner.read_be()?;
189        let data_fork_start = inner.stream_position()?;
190
191        Ok(Self {
192            reader: inner,
193            header,
194            data_fork_start,
195        })
196    }
197
198    /// Open the data fork for reading
199    ///
200    /// Since reading backwards through RLE encoded data is expensive, this should be called before
201    /// accessing the resource fork, if the application wants to read both
202    pub fn data_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
203        self.reset()?;
204        self.reader
205            .seek(io::SeekFrom::Start(self.data_fork_start))?;
206
207        let length = self.data_len() as u64;
208
209        Ok(ForkReader::new(
210            &mut self.reader,
211            self.data_fork_start,
212            length,
213        ))
214    }
215
216    /// Open the resource fork for reading
217    ///
218    /// Since reading backwards through RLE encoded data is expensive, this should be called after
219    /// accessing the data fork, if the application wants to read both
220    pub fn resource_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
221        self.reset()?;
222
223        self.reader.seek(io::SeekFrom::Current(
224            self.data_fork_start as i64 + self.header.data_len as i64 + /* skip crc as well */ 2,
225        ))?;
226
227        let position = self.reader.position;
228        let length = self.resource_len() as u64;
229
230        Ok(ForkReader::new(&mut self.reader, position, length))
231    }
232
233    /// Verify checksums of the whole archive, this resets the stream and reads the whole file to
234    /// calculate the checksums.
235    pub fn verify(&mut self) -> Result<(), VerificationError> {
236        self.reset()?;
237
238        let crc = crc::Crc::<u16>::new(&CRC_16_XMODEM);
239
240        // Verify header checksum
241        let mut buf = vec![0u8; ArchiveHeader::FIXED_SIZE];
242        self.reader.read_exact(&mut buf)?;
243
244        let name_length = buf[0] as usize;
245        buf.append(&mut vec![0u8; name_length]);
246        self.reader.read_exact(
247            &mut buf[(ArchiveHeader::FIXED_SIZE)..(ArchiveHeader::FIXED_SIZE + name_length)],
248        )?;
249
250        let mut digest = crc.digest();
251        digest.update(&buf);
252        if digest.finalize() != 0 {
253            return Err(VerificationError::ChecksumMismatch(Checksum::Header));
254        }
255
256        // Verify data fork checksum
257        let mut chunk = [0u8; VERIFICATION_CHUNK_SIZE];
258        let mut digest = crc.digest();
259        let data_len_offset = name_length + 12;
260        let data_len = u32::from_be_bytes([
261            buf[data_len_offset],
262            buf[data_len_offset + 1],
263            buf[data_len_offset + 2],
264            buf[data_len_offset + 3],
265        ]) as usize
266            + 2;
267
268        for _ in 0..(data_len / VERIFICATION_CHUNK_SIZE) {
269            self.reader.read_exact(&mut chunk)?;
270            digest.update(&chunk);
271        }
272
273        let rest = data_len % VERIFICATION_CHUNK_SIZE;
274        self.reader.read_exact(&mut chunk[0..rest])?;
275        digest.update(&chunk[0..rest]);
276        if digest.finalize() != 0 {
277            return Err(VerificationError::ChecksumMismatch(Checksum::DataFork));
278        }
279
280        // Verify resource fork checksum
281        let mut digest = crc.digest();
282        let resource_len_offset = name_length + 16;
283        let resource_len = u32::from_be_bytes([
284            buf[resource_len_offset],
285            buf[resource_len_offset + 1],
286            buf[resource_len_offset + 2],
287            buf[resource_len_offset + 3],
288        ]) as usize
289            + 2;
290        for _ in 0..(resource_len / VERIFICATION_CHUNK_SIZE) {
291            self.reader.read_exact(&mut chunk)?;
292            digest.update(&chunk);
293        }
294
295        let rest = resource_len % VERIFICATION_CHUNK_SIZE;
296        self.reader.read_exact(&mut chunk[0..rest])?;
297        digest.update(&chunk[0..rest]);
298        if digest.finalize() != 0 {
299            return Err(VerificationError::ChecksumMismatch(Checksum::ResourceFork));
300        }
301
302        Ok(())
303    }
304
305    /// Returns the underlying reader that was used initially to open the archive
306    /// Note that the read position has probably changed
307    pub fn into_inner(self) -> R {
308        self.reader.into_inner()
309    }
310
311    fn reset(&mut self) -> io::Result<()> {
312        self.reader.reset()
313    }
314}
315
316impl Archive<fs::File> {
317    /// Try to open the archive specified by `path`
318    pub fn open<P: AsRef<path::Path>>(path: P) -> Result<Self, Error> {
319        Self::try_from(fs::File::open(path)?)
320    }
321
322    /// Try to clone the archive which would allow you to read resource and data forks
323    /// simultaneously
324    pub fn try_clone(&self) -> io::Result<Self> {
325        Ok(Self {
326            header: self.header.clone(),
327            reader: self.reader.try_clone()?,
328            data_fork_start: self.data_fork_start,
329        })
330    }
331}
332
333impl<R: Clone> Clone for Archive<R> {
334    /// Clone the archive, allowing you to access resource and data forks at the same time
335    fn clone(&self) -> Self {
336        Self {
337            reader: self.reader.clone(),
338            header: self.header.clone(),
339            data_fork_start: self.data_fork_start,
340        }
341    }
342}
343
344/// Detect if a reader is binhex encoded or not
345pub fn probe<R: io::Read + io::Seek>(reader: R) -> bool {
346    Archive::try_from(reader).is_ok()
347}
348
349#[cfg(test)]
350mod test {
351    use std::fs;
352    use std::io;
353    use std::io::Read as _;
354    use std::io::Seek;
355
356    use macintosh_utils::fourcc;
357
358    use super::Archive;
359    use crate::{Checksum, VerificationError};
360
361    #[test]
362    fn decode() {
363        let file = fs::File::open("./sample-file.hqx").unwrap();
364        let archive = Archive::try_from(file).unwrap();
365        assert_eq!(archive.name(), "binhex.test.sit");
366        assert_eq!(archive.file_code(), fourcc!("SITD"));
367        assert_eq!(archive.creator_code(), fourcc!("SIT!"));
368        assert_eq!(archive.data_len(), 380);
369        assert_eq!(archive.resource_len(), 0);
370        assert_eq!(archive.header_checksum(), 0x6e3c);
371    }
372
373    #[test]
374    fn successfully_verify() {
375        let mut reader = Archive::open("sample-file.hqx").unwrap();
376        assert!(reader.verify().is_ok());
377    }
378
379    #[test]
380    fn failed_header_verification() {
381        let mut file = fs::File::open("./sample-file.hqx").unwrap();
382        let mut buffer = Vec::new();
383        file.read_to_end(&mut buffer).unwrap();
384
385        // Corrupt a bit in the archive header
386        // this will change the decoded filename from "binhex.test.sit" to "biéhex.test.sit"
387        buffer[137 + 3] |= 1 << 3;
388
389        let reader = io::Cursor::new(buffer);
390        let mut reader = Archive::try_from(reader).unwrap();
391
392        let result = reader.verify();
393        assert!(matches!(
394            result,
395            Err(VerificationError::ChecksumMismatch(Checksum::Header))
396        ));
397    }
398
399    #[test]
400    fn failed_data_verification() {
401        let mut file = fs::File::open("./sample-file.hqx").unwrap();
402        let mut buffer = Vec::new();
403        file.read_to_end(&mut buffer).unwrap();
404
405        // corrupt a bit in the archive data
406        buffer[240 + 3] |= 1 << 3;
407
408        let reader = io::Cursor::new(buffer);
409        let mut reader = Archive::try_from(reader).unwrap();
410
411        let result = reader.verify();
412        assert!(matches!(
413            result,
414            Err(VerificationError::ChecksumMismatch(Checksum::DataFork))
415        ));
416    }
417
418    #[test]
419    fn reading_data_fork_multiple_times() {
420        let mut archive = Archive::open("sample.txt.hqx").unwrap();
421        assert_eq!(archive.name(), "Sample.txt");
422
423        let mut data_fork = archive.data_fork().unwrap();
424        let mut first_buffer = Vec::new();
425        data_fork.read_to_end(&mut first_buffer).unwrap();
426
427        let mut data_fork = archive.data_fork().unwrap();
428        let mut second_buffer = Vec::new();
429        data_fork.read_to_end(&mut second_buffer).unwrap();
430
431        assert_eq!(first_buffer, second_buffer);
432    }
433
434    #[test]
435    fn reading_resource_fork_multiple_times() {
436        let mut archive = Archive::open("sample.txt.hqx").unwrap();
437        assert_eq!(archive.name(), "Sample.txt");
438
439        let mut rsrc_fork = archive.resource_fork().unwrap();
440        let mut first_buffer = Vec::new();
441        rsrc_fork.read_to_end(&mut first_buffer).unwrap();
442
443        let mut rsrc_fork = archive.resource_fork().unwrap();
444        let mut second_buffer = Vec::new();
445        rsrc_fork.read_to_end(&mut second_buffer).unwrap();
446
447        assert_eq!(first_buffer, second_buffer);
448    }
449
450    #[test]
451    fn seeking_in_data_fork() {
452        let mut archive = Archive::open("sample.txt.hqx").unwrap();
453        // Hello, World!
454        let mut data_fork = archive.data_fork().unwrap();
455
456        let mut hello = vec![0u8; 5];
457        data_fork.read_exact(&mut hello).unwrap();
458
459        data_fork.seek(io::SeekFrom::Current(2)).unwrap();
460
461        let mut world = vec![0u8; 5];
462        data_fork.read_exact(&mut world).unwrap();
463
464        assert_eq!(hello, b"Hello");
465        assert_eq!(world, b"World");
466
467        data_fork.seek(io::SeekFrom::Start(5 + 2 + 5)).unwrap();
468        let mut rest = Vec::new();
469        data_fork.read_to_end(&mut rest).unwrap();
470        assert_eq!(rest, b"!");
471    }
472
473    #[test]
474    fn seeking_in_resource_fork() {
475        let mut archive = Archive::open("sample.txt.hqx").unwrap();
476        let mut data_fork = archive.resource_fork().unwrap();
477
478        let mut chunk = vec![0u8; 4];
479        data_fork.read_exact(&mut chunk).unwrap();
480        assert_eq!(chunk, b"\x00\x00\x01\x00");
481
482        data_fork.seek(io::SeekFrom::Start(4)).unwrap();
483        data_fork.read_exact(&mut chunk).unwrap();
484        assert_eq!(chunk, b"\x00\x00\x01\x1a");
485
486        data_fork.seek(io::SeekFrom::Start(0x138)).unwrap();
487        data_fork.read_exact(&mut chunk).unwrap();
488        assert_eq!(chunk, b"styl");
489    }
490}