1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
//! Integration tests for the StuffIt 5 Arsenic (method 15) decoder.
//!
//! A minimal StuffIt 5 container walker (per FORMAT-SPEC §1, adapted to the
//! real on-disk layout of the staged fixtures) locates every method-15 fork
//! and feeds its compressed range to the Arsenic decoder. Each decoded fork
//! is validated by (a) the in-stream CRC-32 trailer (verified inside the
//! decoder — a mismatch returns `Corrupt`), and (b) the decoded length
//! matching the container's uncompressed-size field.
#![cfg(feature = "arsenic")]
use compcol::arsenic::Arsenic;
use compcol::{Algorithm, Decoder, Error, Status};
/// The smallest staged fixture, bundled into the repo.
const GALAX_SIT: &[u8] = include_bytes!("fixtures/arsenic/Galax.SIT");
/// A method-15 fork located by the container walker.
struct Fork {
/// Compressed payload (the raw method-15 fork bytes).
data: std::ops::Range<usize>,
/// Declared uncompressed size.
uncompressed: usize,
/// True for the data fork, false for the resource fork.
is_data: bool,
}
/// Walk a StuffIt 5 archive and return every method-15 (Arsenic) fork.
///
/// Layout discovered from the real fixtures:
/// - Archive begins with `"StuffIt"` + one terminator byte (`'!'`/`' '`/`'?'`).
/// - Entries are tagged with magic `0xA5A5A5A5`. The common header is 48
/// bytes: `+6` u16 header size (= 48 + name length), `+9` flags (bit
/// `0x40` = directory), `+31` name length, name at `+48`.
/// - The **data fork** descriptor lives in the common header: uncompressed
/// size at `+34`, compressed size at `+38`, method byte at `+46`.
/// - The **resource fork** descriptor is the 50-byte extended record at
/// `forkbase = entrystart + header_size`: uncompressed size at
/// `forkbase+36`, compressed size at `forkbase+40`, method byte at
/// `forkbase+48`.
/// - Resource-fork data begins at `forkbase + 50` (length = rsrc compressed),
/// immediately followed by the data-fork data (length = data compressed).
///
/// The walker scans for every `0xA5A5A5A5` magic (robust to the exact
/// sibling/child threading) and parses each non-directory entry.
fn walk_stuffit5(d: &[u8]) -> Vec<Fork> {
assert!(d.len() >= 8, "archive too small");
assert_eq!(&d[..7], b"StuffIt", "bad archive signature");
assert!(
matches!(d[7], b'!' | b' ' | b'?'),
"bad archive signature terminator: {:#x}",
d[7]
);
let be32 = |o: usize| u32::from_be_bytes([d[o], d[o + 1], d[o + 2], d[o + 3]]) as usize;
let mut forks = Vec::new();
let mut i = 0usize;
while i + 48 <= d.len() {
if d[i..i + 4] != [0xA5, 0xA5, 0xA5, 0xA5] {
i += 1;
continue;
}
let header_size = u16::from_be_bytes([d[i + 6], d[i + 7]]) as usize;
let is_dir = d[i + 9] & 0x40 != 0;
if is_dir || header_size < 48 || i + header_size > d.len() {
i += 4;
continue;
}
let forkbase = i + header_size;
// Data-fork descriptor (common header).
let data_unc = be32(i + 34);
let data_comp = be32(i + 38);
let data_method = d[i + 46];
// Resource-fork descriptor (extended record).
let (mut rsrc_unc, mut rsrc_comp, mut rsrc_method) = (0usize, 0usize, 0u8);
if forkbase + 50 <= d.len() {
rsrc_unc = be32(forkbase + 36);
rsrc_comp = be32(forkbase + 40);
rsrc_method = d[forkbase + 48];
}
let rsrc_off = forkbase + 50;
let data_off = rsrc_off + rsrc_comp;
if rsrc_method == 15 && rsrc_comp > 0 && rsrc_off + rsrc_comp <= d.len() {
forks.push(Fork {
data: rsrc_off..rsrc_off + rsrc_comp,
uncompressed: rsrc_unc,
is_data: false,
});
}
if data_method == 15 && data_comp > 0 && data_off + data_comp <= d.len() {
forks.push(Fork {
data: data_off..data_off + data_comp,
uncompressed: data_unc,
is_data: true,
});
}
i += 4;
}
forks
}
/// Stream `input` through the Arsenic decoder in `in_chunk`-byte input
/// chunks and `out_chunk`-byte output chunks, returning the decoded bytes.
fn decode_chunked(input: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut dec = Arsenic::decoder();
let mut out = Vec::new();
let mut obuf = vec![0u8; out_chunk.max(1)];
let mut pos = 0usize;
// Feed input in chunks; drain output fully on each call.
loop {
let end = (pos + in_chunk.max(1)).min(input.len());
let chunk = &input[pos..end];
let mut consumed_total = 0usize;
loop {
let (p, status) = dec.decode(&chunk[consumed_total..], &mut obuf)?;
out.extend_from_slice(&obuf[..p.written]);
consumed_total += p.consumed;
match status {
Status::StreamEnd => return Ok(out),
Status::OutputFull => continue,
Status::InputEmpty => break,
}
}
pos = end;
if pos >= input.len() {
break;
}
}
// Drain the tail via finish().
loop {
let (p, status) = dec.finish(&mut obuf)?;
out.extend_from_slice(&obuf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
if p.written == 0 {
break;
}
}
Ok(out)
}
#[test]
fn galax_fixture_decodes_and_verifies() {
let forks = walk_stuffit5(GALAX_SIT);
assert_eq!(forks.len(), 1, "expected exactly one method-15 fork");
let fork = &forks[0];
// Galax stores its Arsenic payload in the resource fork.
assert!(!fork.is_data, "Galax's only fork is a resource fork");
let compressed = &GALAX_SIT[fork.data.clone()];
// One-shot decode (in-stream CRC verified inside the decoder).
let out = decode_chunked(compressed, compressed.len(), 1 << 16)
.expect("Galax fork should decode and pass its in-stream CRC");
assert_eq!(
out.len(),
fork.uncompressed,
"decoded length must equal the container's uncompressed size"
);
}
#[test]
fn galax_fixture_decodes_under_byte_chunking() {
let forks = walk_stuffit5(GALAX_SIT);
let fork = &forks[0];
let compressed = &GALAX_SIT[fork.data.clone()];
// 1-byte input chunks, 1-byte output chunks: exercises the resumable
// state machine under the most adversarial chunking.
let out = decode_chunked(compressed, 1, 1).expect("byte-chunked decode should match one-shot");
assert_eq!(out.len(), fork.uncompressed);
let one_shot = decode_chunked(compressed, compressed.len(), 1 << 16).unwrap();
assert_eq!(out, one_shot, "chunking must not change the output");
}
#[test]
fn empty_input_needs_more_then_errors_on_finish() {
// No input at all: decode returns InputEmpty (not done); finish on a
// never-terminated stream is UnexpectedEnd.
let mut dec = Arsenic::decoder();
let mut obuf = [0u8; 16];
let (p, status) = dec.decode(&[], &mut obuf).unwrap();
assert_eq!(p.written, 0);
assert_eq!(status, Status::InputEmpty);
let err = dec.finish(&mut obuf).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn truncated_stream_is_clean_error() {
// Feed only the first few bytes of a real fork, then finish: the stream
// never reaches its in-band terminator, so finish must report
// UnexpectedEnd rather than loop or panic.
let forks = walk_stuffit5(GALAX_SIT);
let compressed = &GALAX_SIT[forks[0].data.clone()];
let truncated = &compressed[..16];
let mut dec = Arsenic::decoder();
let mut obuf = [0u8; 256];
// Decode may make no progress (need more input).
let _ = dec.decode(truncated, &mut obuf);
let err = dec.finish(&mut obuf).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn bad_signature_is_corrupt() {
// A fully-present but bogus stream whose decoded "As" tag is wrong must
// be rejected as Corrupt (not UnexpectedEnd). Feed plenty of bytes so the
// decoder does not bail on underflow before the signature check.
let bogus = vec![0xFFu8; 512];
let mut dec = Arsenic::decoder();
let mut obuf = [0u8; 256];
// It either errors on decode or on finish; in both cases it must be
// Corrupt, never a panic.
let r1 = dec.decode(&bogus, &mut obuf);
let err = match r1 {
Err(e) => e,
Ok(_) => dec.finish(&mut obuf).unwrap_err(),
};
assert_eq!(err, Error::Corrupt);
}
#[test]
fn encoder_is_unsupported() {
use compcol::Encoder;
let mut enc = Arsenic::encoder();
let mut obuf = [0u8; 16];
assert_eq!(
enc.encode(b"hi", &mut obuf).unwrap_err(),
Error::Unsupported
);
}
#[test]
fn factory_registration() {
#[cfg(feature = "factory")]
{
assert!(compcol::factory::decoder_by_name("arsenic").is_some());
assert!(compcol::factory::names().contains(&"arsenic"));
assert_eq!(compcol::factory::extension("arsenic"), Some("arsenic"));
}
}