fstool 0.4.13

Build disk images and filesystems (ext2/3/4, MBR, GPT) from a directory tree and TOML spec, in the spirit of genext2fs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
//! DMG chunk decoders for the compressed entry types that aren't
//! zero / raw / zlib.
//!
//! Each function takes the raw compressed payload as recorded in the
//! data fork and writes exactly `plain_len` decoded bytes back. The
//! callers in [`crate::block::dmg`] derive `plain_len` from
//! `chunk.sector_count * 512`, which is the contract every BLKX entry
//! advertises.
//!
//! ## Codec choices
//!
//! - **bzip2**: `compcol` bzip2 decoder.
//! - **LZFSE**: `compcol` LZFSE decoder.
//! - **LZMA**: `compcol` `.lzma` (alone) decoder over the raw LZMA1
//!   frame (DMG never uses the XZ container).
//! - **ADC**: implemented in this file from the public spec described
//!   in Apple's `imageformat.h`; no external dep.
//!
//! When a build is configured without one of the optional codec deps
//! the corresponding `decode_*` function returns
//! [`crate::Error::Unsupported`] so a slim binary can still open zlib
//! / raw / zero-only images.

use crate::Result;

/// Decode an ADC (Apple Data Compression) stream into `out`.
///
/// ADC is a tiny LZ77 variant. The encoder emits one of three opcodes:
///
/// - `0x00..=0x7F`: literal run. The opcode byte's value + 1 (1..=128)
///   bytes follow directly in the stream and are copied verbatim.
/// - `0x80..=0xBF`: short back-reference. One additional byte follows.
///   - length = ((opcode >> 2) & 0x0F) + 3        →  3..=18
///   - distance = (((opcode & 0x03) << 8) | next) + 1  →  1..=1024
/// - `0xC0..=0xFF`: long back-reference. Two additional bytes follow
///   (big-endian).
///   - length = (opcode & 0x3F) + 4               →  4..=67
///   - distance = ((hi << 8) | lo) + 1            →  1..=65536
///
/// The back-reference distance is measured from the **current write
/// position**, looking backward into already-decoded bytes. Length and
/// distance can overlap (`distance < length`); when they do, the copy
/// must propagate forward byte-by-byte so each freshly written byte is
/// visible to the rest of the run. That's how ADC encodes simple runs
/// like `aaaa` (distance 1, length 4).
///
/// The function writes into the prefix of `out`, returning the number
/// of bytes produced. Callers should size `out` to the chunk's expected
/// plain length and treat any short / long result as a malformed image.
pub fn decode_adc(src: &[u8], out: &mut [u8]) -> Result<usize> {
    let mut sp = 0usize;
    let mut dp = 0usize;
    while sp < src.len() {
        let op = src[sp];
        sp += 1;

        if op < 0x80 {
            // Literal run of (op + 1) bytes.
            let len = op as usize + 1;
            if sp + len > src.len() {
                return Err(crate::Error::InvalidImage(format!(
                    "dmg/adc: literal run of {len} bytes runs past source ({} of {} consumed)",
                    sp,
                    src.len()
                )));
            }
            if dp + len > out.len() {
                return Err(crate::Error::InvalidImage(format!(
                    "dmg/adc: literal run of {len} bytes overflows output ({}+{}>{})",
                    dp,
                    len,
                    out.len()
                )));
            }
            out[dp..dp + len].copy_from_slice(&src[sp..sp + len]);
            sp += len;
            dp += len;
        } else if op < 0xC0 {
            // Short reference: op + one trailing byte.
            if sp >= src.len() {
                return Err(crate::Error::InvalidImage(
                    "dmg/adc: short reference truncated".into(),
                ));
            }
            let b = src[sp];
            sp += 1;
            let len = (((op >> 2) & 0x0F) as usize) + 3;
            let dist = ((((op & 0x03) as usize) << 8) | b as usize) + 1;
            adc_copy(out, &mut dp, dist, len)?;
        } else {
            // Long reference: op + two trailing big-endian bytes.
            if sp + 2 > src.len() {
                return Err(crate::Error::InvalidImage(
                    "dmg/adc: long reference truncated".into(),
                ));
            }
            let hi = src[sp] as usize;
            let lo = src[sp + 1] as usize;
            sp += 2;
            let len = ((op & 0x3F) as usize) + 4;
            let dist = ((hi << 8) | lo) + 1;
            adc_copy(out, &mut dp, dist, len)?;
        }
    }
    Ok(dp)
}

/// Inner copy helper for ADC back-references. Propagates byte-by-byte
/// so overlapping copies (distance < length) replicate the source run
/// the way ADC encoders rely on.
fn adc_copy(out: &mut [u8], dp: &mut usize, dist: usize, len: usize) -> Result<()> {
    if dist == 0 || dist > *dp {
        return Err(crate::Error::InvalidImage(format!(
            "dmg/adc: back-reference distance {dist} out of range (dp = {dp})",
            dp = *dp
        )));
    }
    if *dp + len > out.len() {
        return Err(crate::Error::InvalidImage(format!(
            "dmg/adc: back-reference of {len} bytes overflows output ({}+{}>{})",
            *dp,
            len,
            out.len()
        )));
    }
    for i in 0..len {
        out[*dp + i] = out[*dp + i - dist];
    }
    *dp += len;
    Ok(())
}

/// Decode an in-memory compcol-codec chunk into a `Vec`, refusing to expand
/// past `plain_len` (a decompression-bomb guard for the bounded per-chunk
/// case). A manual `decode`/`finish` loop with an explicit size check —
/// rather than `LimitedDecoder`, which stalls codecs (e.g. bzip2) that need
/// a post-output call to consume their trailer when the cap is exact.
#[cfg(any(feature = "dmg-bzip2", feature = "dmg-lzfse"))]
fn decode_chunk<A: compcol::Algorithm>(
    src: &[u8],
    plain_len: usize,
    label: &str,
) -> Result<Vec<u8>> {
    use compcol::{Decoder, Status};
    let mut dec = A::decoder();
    let mut out = Vec::with_capacity(plain_len);
    let mut scratch = vec![0u8; 64 * 1024];
    let mut consumed = 0usize;
    let err = |e| crate::Error::InvalidImage(format!("dmg: {label} chunk decode failed: {e}"));
    let overflow = || {
        crate::Error::InvalidImage(format!(
            "dmg: {label} chunk expanded past {plain_len} bytes"
        ))
    };
    loop {
        let (p, status) = dec.decode(&src[consumed..], &mut scratch).map_err(err)?;
        out.extend_from_slice(&scratch[..p.written]);
        consumed += p.consumed;
        if out.len() > plain_len {
            return Err(overflow());
        }
        match status {
            Status::StreamEnd => return Ok(out),
            Status::OutputFull => continue,
            Status::InputEmpty => break,
        }
    }
    loop {
        let (p, status) = dec.finish(&mut scratch).map_err(err)?;
        out.extend_from_slice(&scratch[..p.written]);
        if out.len() > plain_len {
            return Err(overflow());
        }
        if matches!(status, Status::StreamEnd) || p.written == 0 {
            break;
        }
    }
    Ok(out)
}

/// Decode a bzip2 chunk payload into a vector of exactly `plain_len`
/// bytes. DMG bz2 chunks are standalone bzip2 streams (BZh magic,
/// no additional framing).
#[cfg(feature = "dmg-bzip2")]
pub fn decode_bzip2(src: &[u8], plain_len: usize) -> Result<Vec<u8>> {
    let out = decode_chunk::<compcol::bzip2::Bzip2>(src, plain_len, "bzip2")?;
    if out.len() != plain_len {
        return Err(crate::Error::InvalidImage(format!(
            "dmg: bzip2 chunk inflated to {} bytes but sector_count*512 = {}",
            out.len(),
            plain_len
        )));
    }
    Ok(out)
}

/// Stub when the bzip2 dep was compiled out.
#[cfg(not(feature = "dmg-bzip2"))]
pub fn decode_bzip2(_src: &[u8], _plain_len: usize) -> Result<Vec<u8>> {
    Err(crate::Error::Unsupported(
        "dmg: bzip2 chunks require the `dmg-bzip2` Cargo feature".into(),
    ))
}

/// Decode an LZFSE chunk payload into a vector of exactly `plain_len`
/// bytes. DMG LZFSE chunks are standalone bvxN/bvx2/bvx1/bvx- framed
/// blocks; decoded via `compcol::lzfse` (decode-only).
#[cfg(feature = "dmg-lzfse")]
pub fn decode_lzfse(src: &[u8], plain_len: usize) -> Result<Vec<u8>> {
    let out = decode_chunk::<compcol::lzfse::Lzfse>(src, plain_len, "lzfse")?;
    if out.len() != plain_len {
        return Err(crate::Error::InvalidImage(format!(
            "dmg: lzfse chunk inflated to {} bytes but sector_count*512 = {}",
            out.len(),
            plain_len
        )));
    }
    Ok(out)
}

/// Stub when the LZFSE dep was compiled out.
#[cfg(not(feature = "dmg-lzfse"))]
pub fn decode_lzfse(_src: &[u8], _plain_len: usize) -> Result<Vec<u8>> {
    Err(crate::Error::Unsupported(
        "dmg: LZFSE chunks require the `dmg-lzfse` Cargo feature".into(),
    ))
}

/// Decode an LZMA chunk payload into a vector of exactly `plain_len`
/// bytes. DMG uses the raw LZMA1 frame (the legacy `.lzma` shape with
/// a 13-byte header carrying properties + dictionary size +
/// uncompressed length), not the XZ container.
#[cfg(feature = "lzma")]
pub fn decode_lzma(src: &[u8], plain_len: usize) -> Result<Vec<u8>> {
    let out = crate::compression::decompress(crate::compression::Algo::Lzma, src, plain_len)?;
    if out.len() != plain_len {
        return Err(crate::Error::InvalidImage(format!(
            "dmg: lzma chunk inflated to {} bytes but sector_count*512 = {}",
            out.len(),
            plain_len
        )));
    }
    Ok(out)
}

/// Stub when the LZMA dep was compiled out.
#[cfg(not(feature = "lzma"))]
pub fn decode_lzma(_src: &[u8], _plain_len: usize) -> Result<Vec<u8>> {
    Err(crate::Error::Unsupported(
        "dmg: LZMA chunks require the `lzma` Cargo feature".into(),
    ))
}

/// Decode an ADC chunk into a vector of exactly `plain_len` bytes.
/// Wraps [`decode_adc`] so the call site mirrors the other codec
/// helpers.
pub fn decode_adc_chunk(src: &[u8], plain_len: usize) -> Result<Vec<u8>> {
    let mut out = vec![0u8; plain_len];
    let n = decode_adc(src, &mut out)?;
    if n != plain_len {
        return Err(crate::Error::InvalidImage(format!(
            "dmg: ADC chunk produced {n} bytes but sector_count*512 = {plain_len}"
        )));
    }
    Ok(out)
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Encode `data` as a single ADC literal run. Caller must keep
    /// `data.len() <= 128`. Used by the tests to build minimal vectors.
    fn adc_literal(data: &[u8]) -> Vec<u8> {
        assert!(!data.is_empty() && data.len() <= 128);
        let mut v = Vec::with_capacity(1 + data.len());
        v.push((data.len() - 1) as u8);
        v.extend_from_slice(data);
        v
    }

    #[test]
    fn adc_literal_only() {
        let payload = b"hello world";
        let stream = adc_literal(payload);
        let mut out = vec![0u8; payload.len()];
        let n = decode_adc(&stream, &mut out).unwrap();
        assert_eq!(n, payload.len());
        assert_eq!(&out[..], payload);
    }

    #[test]
    fn adc_long_literal_run() {
        // 128-byte literal run, the maximum a single opcode can carry.
        let payload: Vec<u8> = (0..128u8).collect();
        let stream = adc_literal(&payload);
        let mut out = vec![0u8; 128];
        let n = decode_adc(&stream, &mut out).unwrap();
        assert_eq!(n, 128);
        assert_eq!(out, payload);
    }

    #[test]
    fn adc_short_reference() {
        // First emit "ABCD" as a literal, then a short reference of
        // length 4 / distance 4 — should reproduce "ABCD" again.
        // op = 0x80 | ((len-3) << 2) | ((dist-1) >> 8)
        // len = 4 → (len-3) = 1
        // dist = 4 → (dist-1) = 3 → hi 0, lo 3
        // op = 0x80 | (1 << 2) | 0 = 0x84, trailing byte = 3.
        let mut stream = adc_literal(b"ABCD");
        stream.push(0x84);
        stream.push(0x03);
        let mut out = vec![0u8; 8];
        let n = decode_adc(&stream, &mut out).unwrap();
        assert_eq!(n, 8);
        assert_eq!(&out[..], b"ABCDABCD");
    }

    #[test]
    fn adc_short_reference_overlap_run() {
        // Encode "a" then back-reference dist=1 len=3 — should give
        // "aaaa". op = 0x80 | ((3-3)<<2) | 0 = 0x80, trailing = 0.
        let mut stream = adc_literal(b"a");
        stream.push(0x80);
        stream.push(0x00);
        let mut out = vec![0u8; 4];
        let n = decode_adc(&stream, &mut out).unwrap();
        assert_eq!(n, 4);
        assert_eq!(&out[..], b"aaaa");
    }

    #[test]
    fn adc_long_reference() {
        // Build a 200-byte literal preamble of 0xAA, then a long
        // reference len=67 dist=200 reproducing the first 67 bytes.
        let mut stream = Vec::new();
        // Two 100-byte literal runs (literal opcode max length 128).
        stream.push(99u8); // run of 100 bytes
        stream.extend(std::iter::repeat_n(0xAA, 100));
        stream.push(99u8);
        stream.extend(std::iter::repeat_n(0xBB, 100));

        // Long reference: op = 0xC0 | (len-4) = 0xC0 | 63 = 0xFF.
        // dist - 1 = 199 → 0x00C7.
        stream.push(0xC0 | 63); // len = 67
        stream.push(0x00);
        stream.push(0xC7);

        let mut out = vec![0u8; 267];
        let n = decode_adc(&stream, &mut out).unwrap();
        assert_eq!(n, 267);
        // First 100 bytes 0xAA, next 100 bytes 0xBB, last 67 bytes
        // are the AAs from distance-200 = the start of the buffer.
        assert!(out[0..100].iter().all(|&b| b == 0xAA));
        assert!(out[100..200].iter().all(|&b| b == 0xBB));
        assert!(out[200..267].iter().all(|&b| b == 0xAA));
    }

    #[test]
    fn adc_rejects_distance_zero() {
        // Short reference with zero in the trailing byte AND zero in
        // the opcode low bits ⇒ distance = 0 + 1 = 1, fine. So we have
        // to construct an actual zero-distance case. That requires
        // dp = 0 and any reference — pick a short ref at start of
        // stream which must fail since there's nothing to reference.
        let stream = vec![0x80, 0x00];
        let mut out = vec![0u8; 4];
        let err = decode_adc(&stream, &mut out).unwrap_err();
        match err {
            crate::Error::InvalidImage(_) => {}
            _ => panic!("expected InvalidImage, got {err:?}"),
        }
    }

    #[test]
    fn adc_rejects_truncated_short_reference() {
        // Opcode 0x80 marks a short reference but the trailing byte is
        // missing.
        let stream = vec![0x80];
        let mut out = vec![0u8; 4];
        assert!(decode_adc(&stream, &mut out).is_err());
    }

    #[test]
    fn adc_rejects_truncated_long_reference() {
        // Long reference opcode with only one trailing byte.
        let stream = vec![0xC0, 0x00];
        let mut out = vec![0u8; 4];
        assert!(decode_adc(&stream, &mut out).is_err());
    }

    #[test]
    fn adc_rejects_literal_overrun() {
        // Literal opcode promises 5 bytes but only 2 are present.
        let stream = vec![0x04, 0x01, 0x02];
        let mut out = vec![0u8; 8];
        assert!(decode_adc(&stream, &mut out).is_err());
    }

    #[cfg(feature = "dmg-bzip2")]
    #[test]
    fn bzip2_roundtrip_via_libbz2_independent_vector() {
        // Pre-computed bzip2 stream of the ASCII text "hello world\n"
        // produced by the bzip2 reference CLI. Inline so the test
        // doesn't depend on a write-side encoder.
        let compressed: &[u8] = &[
            0x42, 0x5A, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, 0x53, 0x59, 0x4E, 0xEC, 0xE8, 0x36,
            0x00, 0x00, 0x02, 0x51, 0x80, 0x00, 0x10, 0x40, 0x00, 0x06, 0x44, 0x90, 0x80, 0x20,
            0x00, 0x31, 0x06, 0x4C, 0x41, 0x01, 0xA7, 0xA9, 0xA5, 0x80, 0xBB, 0x94, 0x31, 0xF8,
            0xBB, 0x92, 0x29, 0xC2, 0x84, 0x82, 0x77, 0x67, 0x41, 0xB0,
        ];
        let plain = decode_bzip2(compressed, b"hello world\n".len()).unwrap();
        assert_eq!(plain, b"hello world\n");
    }

    #[cfg(feature = "lzma")]
    #[test]
    fn lzma_roundtrip() {
        // Build a raw-LZMA1 (.lzma alone) stream and round-trip it through
        // decode_lzma (both via compcol).
        let plain = b"the quick brown fox jumps over the lazy dog".repeat(8);
        let compressed =
            crate::compression::compress(crate::compression::Algo::Lzma, &plain).unwrap();
        let out = decode_lzma(&compressed, plain.len()).unwrap();
        assert_eq!(out, plain);
    }
}