Skip to main content

binhex_rs/
lib.rs

1#![doc = include_str!("../README.md")]
2mod fork_reader;
3mod lookup_table;
4mod seek_over;
5mod six_bit_decoder;
6mod six_bit_rle_reader;
7mod util;
8
9use std::fs;
10use std::io;
11use std::io::{Read as _, Seek as _};
12use std::path;
13
14use binrw::{binread, BinReaderExt};
15use crc::CRC_16_XMODEM;
16use fourcc_rs::FourCC;
17use macintosh_utils::FinderFlags;
18
19pub use fork_reader::ForkReader;
20use seek_over::SeekOver;
21use six_bit_rle_reader::SixBitRleReader;
22use util::ReadByte;
23
24use crate::six_bit_decoder::SixBitDecoder;
25
26/// General error used by the crate
27#[derive(Debug, thiserror::Error)]
28pub enum Error {
29    /// The BinHex 4.0 header could not be located
30    #[error("The BinHex 4.0 header could not be located")]
31    HeaderNotFound,
32
33    /// A read or seek operation on the underlying reader failed
34    #[error(transparent)]
35    Io(#[from] io::Error),
36
37    /// Some data could not be decoded from the underlying reader
38    #[error(transparent)]
39    BinRw(#[from] binrw::Error),
40
41    /// The binhex encoded stream contains unknown characters
42    #[error("An unexpected character appeared within encoded data")]
43    InvalidCharacter,
44
45    /// The underlying stream did not provide enough data for decompression
46    #[error("Unexpected EOF")]
47    UnexpectedEof,
48
49    /// The seek operation is not supported
50    #[error("Seeking backwards or from the end is not supported at the moment")]
51    UnsupportedSeek,
52}
53
54impl From<Error> for io::Error {
55    fn from(val: Error) -> Self {
56        match val {
57            Error::Io(io) => io,
58            other => io::Error::other(Box::new(other)),
59        }
60    }
61}
62
63const VERIFICATION_CHUNK_SIZE: usize = 1 << 20 /* bytes => 1 Mb */;
64
65#[derive(Debug)]
66/// Identifies the location where checksum verification failed
67pub enum Checksum {
68    /// The archive header is broken
69    Header,
70    /// The data fork is invalid
71    DataFork,
72    /// The resource fork is invalid
73    ResourceFork,
74}
75
76#[derive(Debug, thiserror::Error)]
77/// Error used to signal invalid checksums
78pub enum VerificationError {
79    #[error("A checksum did not match")]
80    /// A crc check has failed
81    ChecksumMismatch(Checksum),
82
83    #[error(transparent)]
84    /// The underlying stream returned an io::Error during seek or read
85    Io(#[from] io::Error),
86
87    #[error(transparent)]
88    /// BinRw was unable to decode some data
89    BinRw(#[from] binrw::Error),
90}
91
92#[binread]
93#[derive(Debug, Clone)]
94#[br(big)]
95/// On-disk structure describing contents of the BinHex archive
96pub struct ArchiveHeader {
97    /// Original name of the embedded file
98    #[br(map(macintosh_utils::string))]
99    pub name: String,
100    #[br(temp)]
101    _name_terminator: u8,
102    /// Four-byte type code for the embedded file
103    pub file_code: FourCC,
104    /// Four-byte identifier for the program that created the file
105    pub creator_code: FourCC,
106    /// Finder flags as observed at the time of compression
107    pub finder_flags: FinderFlags,
108    /// Number of bytes in the uncompressed data fork
109    pub data_len: u32,
110    /// Number of bytes in the uncompressed resource fork
111    pub resource_len: u32,
112    /// CRC-16 checksum of the header bytes
113    ///
114    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
115    pub header_checksum: u16,
116}
117
118impl ArchiveHeader {
119    /// Fixed size of the header on disk
120    pub const FIXED_SIZE: usize = 22;
121}
122
123/// Reader for BinHex archives
124pub struct Archive<R> {
125    header: ArchiveHeader,
126    reader: SixBitRleReader<R>,
127}
128
129impl<R> Archive<R> {
130    /// Original name of the embedded file
131    pub fn name(&self) -> &str {
132        self.header.name.as_str()
133    }
134
135    /// The file's four-byte type identifier
136    pub fn file_code(&self) -> FourCC {
137        self.header.file_code
138    }
139
140    /// The file's four-byte creator code
141    pub fn creator_code(&self) -> FourCC {
142        self.header.creator_code
143    }
144
145    /// Finder flags at the time of compression
146    pub fn finder_flags(&self) -> FinderFlags {
147        self.header.finder_flags
148    }
149
150    /// Number of bytes in the uncompressed data fork
151    pub fn data_len(&self) -> usize {
152        self.header.data_len as usize
153    }
154
155    /// Number of bytes in the uncompressed resource fork
156    pub fn resource_len(&self) -> usize {
157        self.header.resource_len as usize
158    }
159
160    /// CRC-16 checksum of the header bytes
161    ///
162    /// The CRC-16 configuration is `CRC-16-XMODEM` the CRC of ASCII "123456789" is 0x31C3.
163    pub fn header_checksum(&self) -> u16 {
164        self.header.header_checksum
165    }
166}
167
168impl<R: io::Read + io::Seek> Archive<R> {
169    /// Fixed header that introduces the start of encoded data
170    pub const HEADER: &[u8; 45] = b"(This file must be converted with BinHex 4.0)";
171
172    /// Try to read an archive from the reader.
173    ///
174    /// It searches for the start marker and reads the archive header after that. Returns an error
175    /// if either fails.
176    pub fn try_from(mut inner: R) -> Result<Self, Error> {
177        inner
178            .seek_over_string(Self::HEADER)
179            .map_err(|_| Error::HeaderNotFound)?;
180
181        let data_start = loop {
182            if inner.read_byte()? == b':' {
183                break inner.stream_position()?;
184            }
185        };
186
187        let mut inner = SixBitRleReader::new(inner, data_start);
188        let header: ArchiveHeader = inner.read_be()?;
189
190        Ok(Self {
191            reader: inner,
192            header,
193        })
194    }
195
196    /// Open the data fork for reading
197    ///
198    /// Since reading backwards through RLE encoded data is expensive, this should be called before
199    /// accessing the resource fork, if the application wants to read both
200    pub fn data_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
201        self.reset()?;
202
203        let position = self.reader.position;
204        let length = self.data_len() as u64;
205
206        Ok(ForkReader::new(&mut self.reader, position, length))
207    }
208
209    /// Open the resource fork for reading
210    ///
211    /// Since reading backwards through RLE encoded data is expensive, this should be called after
212    /// accessing the data fork, if the application wants to read both
213    pub fn resource_fork(&mut self) -> Result<ForkReader<&mut SixBitRleReader<R>>, Error> {
214        self.reset()?;
215
216        self.reader.seek(io::SeekFrom::Current(
217            self.header.data_len as i64 + /* skip crc as well */ 2,
218        ))?;
219
220        let position = self.reader.position;
221        let length = self.resource_len() as u64;
222
223        Ok(ForkReader::new(&mut self.reader, position, length))
224    }
225
226    /// Verify checksums of the whole archive, this resets the stream and reads the whole file to
227    /// calculate the checksums.
228    pub fn verify(&mut self) -> Result<(), VerificationError> {
229        self.reset()?;
230
231        let crc = crc::Crc::<u16>::new(&CRC_16_XMODEM);
232
233        // Verify header checksum
234        let mut buf = vec![0u8; ArchiveHeader::FIXED_SIZE];
235        self.reader.read_exact(&mut buf)?;
236
237        let name_length = buf[0] as usize;
238        buf.append(&mut vec![0u8; name_length]);
239        self.reader.read_exact(
240            &mut buf[(ArchiveHeader::FIXED_SIZE)..(ArchiveHeader::FIXED_SIZE + name_length)],
241        )?;
242
243        let mut digest = crc.digest();
244        digest.update(&buf);
245        if digest.finalize() != 0 {
246            return Err(VerificationError::ChecksumMismatch(Checksum::Header));
247        }
248
249        // Verify data fork checksum
250        let mut chunk = [0u8; VERIFICATION_CHUNK_SIZE];
251        let mut digest = crc.digest();
252        let data_len_offset = name_length + 12;
253        let data_len = u32::from_be_bytes([
254            buf[data_len_offset],
255            buf[data_len_offset + 1],
256            buf[data_len_offset + 2],
257            buf[data_len_offset + 3],
258        ]) as usize
259            + 2;
260
261        for _ in 0..(data_len / VERIFICATION_CHUNK_SIZE) {
262            self.reader.read_exact(&mut chunk)?;
263            digest.update(&chunk);
264        }
265
266        let rest = data_len % VERIFICATION_CHUNK_SIZE;
267        self.reader.read_exact(&mut chunk[0..rest])?;
268        digest.update(&chunk[0..rest]);
269        if digest.finalize() != 0 {
270            return Err(VerificationError::ChecksumMismatch(Checksum::DataFork));
271        }
272
273        // Verify resource fork checksum
274        let mut digest = crc.digest();
275        let resource_len_offset = name_length + 16;
276        let resource_len = u32::from_be_bytes([
277            buf[resource_len_offset],
278            buf[resource_len_offset + 1],
279            buf[resource_len_offset + 2],
280            buf[resource_len_offset + 3],
281        ]) as usize
282            + 2;
283        for _ in 0..(resource_len / VERIFICATION_CHUNK_SIZE) {
284            self.reader.read_exact(&mut chunk)?;
285            digest.update(&chunk);
286        }
287
288        let rest = resource_len % VERIFICATION_CHUNK_SIZE;
289        self.reader.read_exact(&mut chunk[0..rest])?;
290        digest.update(&chunk[0..rest]);
291        if digest.finalize() != 0 {
292            return Err(VerificationError::ChecksumMismatch(Checksum::ResourceFork));
293        }
294
295        Ok(())
296    }
297
298    /// Returns the underlying reader that was used initially to open the archive
299    /// Note that the read position has probably changed
300    pub fn into_inner(self) -> R {
301        self.reader.into_inner()
302    }
303
304    fn reset(&mut self) -> io::Result<()> {
305        self.reader.reset()
306    }
307}
308
309impl Archive<fs::File> {
310    /// Try to open the archive specified by `path`
311    pub fn open<P: AsRef<path::Path>>(path: P) -> Result<Self, Error> {
312        Self::try_from(fs::File::open(path)?)
313    }
314
315    /// Try to clone the archive which would allow you to read resource and data forks
316    /// simultaneously
317    pub fn try_clone(&self) -> io::Result<Self> {
318        Ok(Self {
319            header: self.header.clone(),
320            reader: self.reader.try_clone()?,
321        })
322    }
323}
324
325impl<R: Clone> Clone for Archive<R> {
326    /// Clone the archive, allowing you to access resource and data forks at the same time
327    fn clone(&self) -> Self {
328        Self {
329            reader: self.reader.clone(),
330            header: self.header.clone(),
331        }
332    }
333}
334
335pub fn probe<R: io::Read + io::Seek>(reader: R) -> bool {
336    Archive::try_from(reader).is_ok()
337}
338
339#[cfg(test)]
340mod test {
341    use std::fs;
342    use std::io;
343    use std::io::Read as _;
344
345    use fourcc_rs::fourcc;
346
347    use super::Archive;
348    use crate::{Checksum, VerificationError};
349
350    #[test]
351    fn decode() {
352        let file = fs::File::open("./sample-file.hqx").unwrap();
353        let archive = Archive::try_from(file).unwrap();
354        assert_eq!(archive.name(), "binhex.test.sit");
355        assert_eq!(archive.file_code(), fourcc!("SITD"));
356        assert_eq!(archive.creator_code(), fourcc!("SIT!"));
357        assert_eq!(archive.data_len(), 380);
358        assert_eq!(archive.resource_len(), 0);
359        assert_eq!(archive.header_checksum(), 0x6e3c);
360    }
361
362    #[test]
363    fn successfully_verify() {
364        let mut reader = Archive::open("sample-file.hqx").unwrap();
365        assert!(reader.verify().is_ok());
366    }
367
368    #[test]
369    fn failed_header_verification() {
370        let mut file = fs::File::open("./sample-file.hqx").unwrap();
371        let mut buffer = Vec::new();
372        file.read_to_end(&mut buffer).unwrap();
373
374        // Corrupt a bit in the archive header
375        // this will change the decoded filename from "binhex.test.sit" to "biéhex.test.sit"
376        buffer[137 + 3] |= 1 << 3;
377
378        let reader = io::Cursor::new(buffer);
379        let mut reader = Archive::try_from(reader).unwrap();
380
381        let result = reader.verify();
382        assert!(matches!(
383            result,
384            Err(VerificationError::ChecksumMismatch(Checksum::Header))
385        ));
386    }
387
388    #[test]
389    fn failed_data_verification() {
390        let mut file = fs::File::open("./sample-file.hqx").unwrap();
391        let mut buffer = Vec::new();
392        file.read_to_end(&mut buffer).unwrap();
393
394        // corrupt a bit in the archive data
395        buffer[240 + 3] |= 1 << 3;
396
397        let reader = io::Cursor::new(buffer);
398        let mut reader = Archive::try_from(reader).unwrap();
399
400        let result = reader.verify();
401        assert!(matches!(
402            result,
403            Err(VerificationError::ChecksumMismatch(Checksum::DataFork))
404        ));
405    }
406}