Skip to main content

dar_forensic/
lib.rs

1//! Pure-Rust reader for Denis Corbin DAR (Disk ARchiver) archives.
2//!
3//! Supports DAR formats 7–11 (produced by dar 2.3–2.8) and the legacy ≤7 grammar.
4//! Passware Kit Mobile produces format-9 archives; dar 2.8.5 produces 11.3.
5//! Entries and the catalogue compressed with gzip, bzip2 or xz are transparently
6//! decompressed (pure-Rust); lzo, zstd, lz4 and encryption are not decoded.
7//!
8//! ## Format sketch
9//!
10//! ```text
11//! Slice header:
12//!   [4]  magic = 00 00 00 7b  (SAUV_MAGIC_NUMBER = 123, big-endian u32)
13//!   [10] internal_name label
14//!   [1]  flag  [1]  ext_char
15//!   TLV list:  infinint(count) + count × (u16 type + infinint len + data)
16//!   ← archive_origin: all catalog archive_offset values are relative to here
17//!
18//! Archive body:
19//!   escaped sequences (seqt_file, seqt_saved, …) + raw file bytes
20//!
21//! Catalog  (located by seqt_catalogue escape: AD FD EA 77 21 43):
22//!   [10] label  +  (NUL working-dir path, format 11.1+ only)  +  entries
23//!
24//!   Each entry: cat_sig byte where (cat_sig & 0x1f | 0x60) gives type
25//!     'd' directory  → NUL-name + inode [+ FSA]  (push to dir stack)
26//!     'f' file       → NUL-name + inode [+ FSA] + file-specific fields
27//!     'z' EOD        → pop dir stack; depth=0 → done
28//! ```
29//!
30//! ## Key non-obvious invariants
31//!
32//! - **Infinint**: variable-length. The common form is 5 bytes
33//!   (`0x80 XX XX XX XX`, a big-endian u32); timestamps past 2^32 use the
34//!   9-byte `0x40` form (big-endian u64). Encodings wider than 64 bits are
35//!   rejected as corrupt — this reader decodes to `u64` or errors, never
36//!   truncates.
37//! - **Permissions**: 2-byte big-endian u16, *not* an infinint.
38//! - **Timestamps**: format 8 stores a bare seconds infinint; format 9+ prefix
39//!   a unit byte (`'s'`/`'u'`/`'n'`) and add a sub-second infinint for `'u'`/`'n'`.
40//! - **FSA** (format 9+ only): inode flag bit `0x10` (FSA-full) adds inode
41//!   infinints and an FSA block; format 8 has no FSA.
42//! - **archive_offset**: points *directly* to the raw file bytes, not to the
43//!   data-section header that precedes them in the body stream.
44//!   `seek(archive_origin + archive_offset)` then `read(stored_size)`.
45//!
46//! Full format notes: `docs/implementation-notes.md`.
47
48use std::io::{Cursor, Read, Seek, SeekFrom, Write};
49
50use thiserror::Error;
51
52/// `00 00 00 7b` — DAR magic (SAUV_MAGIC_NUMBER = 123, big-endian u32).
53const DAR_MAGIC: [u8; 4] = [0x00, 0x00, 0x00, 0x7b];
54
55/// Upper bound on the compressed catalogue bytes read from the archive tail and
56/// on the inflated catalogue, guarding against a decompression bomb (per-file
57/// streams need no such constant — they are bounded by the entry's known size).
58const MAX_CATALOGUE_COMPRESSED: u64 = 512 * 1024 * 1024;
59const MAX_CATALOGUE_INFLATED: u64 = 1024 * 1024 * 1024;
60
61/// Escape sequence marking the catalog: `AD FD EA 77 21 43`.
62const SEQT_CATALOGUE: [u8; 6] = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43];
63
64/// First archive format with an in-place (working-directory) path in the
65/// catalog header — `archive_version(11,1)` → `value() = 11*256 + 1`.
66/// Formats 8, 9, 10 and 11.0 have no such field.
67const FORMAT_11_1: u32 = 11 * 256 + 1;
68
69/// Errors returned by [`DarReader`].
70#[derive(Debug, Error)]
71pub enum DarError {
72    #[error("I/O error: {0}")]
73    Io(#[from] std::io::Error),
74    #[error("not a DAR archive")]
75    NotADar,
76    #[error("corrupt archive: {0}")]
77    Corrupt(String),
78    #[error("entry not found: '{0}'")]
79    EntryNotFound(String),
80}
81
82/// Metadata about one archived file.
83#[derive(Debug, Clone)]
84pub struct DarEntry {
85    pub path: String,
86    pub size: u64,
87}
88
89#[derive(Debug, Clone)]
90struct EntryRef {
91    path: String,
92    size: u64,
93    archive_offset: u64,
94    stored_size: u64,
95    compression: u8,
96    encrypted: bool,
97}
98
99/// Read-only DAR archive reader.
100pub struct DarReader<R: Read + Seek> {
101    inner: R,
102    /// Byte position immediately after the slice header TLV block.
103    /// `archive_origin + archive_offset` = absolute position of raw file bytes.
104    archive_origin: u64,
105    entries: Vec<EntryRef>,
106}
107
108impl<R: Read + Seek> DarReader<R> {
109    /// Open a DAR archive, validating the magic and loading the catalog.
110    pub fn open(mut reader: R) -> Result<Self, DarError> {
111        let mut magic = [0u8; 4];
112        reader
113            .read_exact(&mut magic)
114            .map_err(|_| DarError::NotADar)?;
115        if magic != DAR_MAGIC {
116            return Err(DarError::NotADar);
117        }
118
119        let mut label = [0u8; 10];
120        reader.read_exact(&mut label)?; // internal_name label
121        let _flag = read_u8(&mut reader)?; // slice flag ('T' terminal / 'N' / 'E')
122        let extension = read_u8(&mut reader)?; // 'T' = TLV (format 8+); 'N'/'S' = legacy (<= 7)
123
124        // Format 8+ carries a TLV list and a `seqt_catalogue` escape; format <= 7
125        // has neither — its catalogue is located via the end `terminateur` trailer
126        // (libdar header.cpp extension handling; terminateur.cpp).
127        let entries;
128        let archive_origin;
129        if extension == b'T' {
130            // TLV list: infinint(count) then count × (u16 type + infinint len + data)
131            let tlv_count = read_infinint(&mut reader).map_err(|e| match e {
132                DarError::Io(_) => DarError::Corrupt("truncated TLV block".into()),
133                other => other,
134            })?;
135            for _ in 0..tlv_count {
136                skip(&mut reader, 2)?;
137                let len = read_infinint(&mut reader)?;
138                skip(&mut reader, len)?;
139            }
140
141            archive_origin = reader.stream_position()?;
142            let format_value = read_format_value(&mut reader);
143            // The archive's global compression algorithm is the byte immediately
144            // after the version string; it tells us whether (and how) the
145            // catalogue stream is compressed. Unreadable → treat as stored.
146            let global_comp = read_u8(&mut reader).unwrap_or(b'n');
147            reader.seek(SeekFrom::Start(archive_origin))?;
148
149            // true → seqt_catalogue tape mark found (catalog has label + maybe path);
150            // false → located by its ref_data_name label (tape marks off, e.g. Passware).
151            let via_escape = find_catalogue(&mut reader, &label)?;
152            let format_major = format_value >> 8;
153            if via_escape && is_compressed(global_comp) {
154                // The catalogue is a single stream compressed with the archive
155                // codec, beginning right after the seqt_catalogue escape and
156                // running to the trailer. Inflate it, then parse from the
157                // plaintext buffer — which begins with the in-catalog label and
158                // optional in-place path, exactly like the uncompressed case.
159                let mut compressed = Vec::new();
160                reader
161                    .by_ref()
162                    .take(MAX_CATALOGUE_COMPRESSED)
163                    .read_to_end(&mut compressed)?;
164                let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
165                let mut cur = Cursor::new(inflated);
166                skip(&mut cur, 10)?; // catalog label
167                if format_value >= FORMAT_11_1 {
168                    skip_nul_string(&mut cur)?;
169                }
170                entries = parse_catalog(&mut cur, format_major)?;
171            } else {
172                if via_escape {
173                    skip(&mut reader, 10)?; // catalog label
174                                            // The in-place path exists only from format 11.1
175                                            // (catalogue.cpp:157). Formats 8/9/10/11.0 have none.
176                    if format_value >= FORMAT_11_1 {
177                        skip_nul_string(&mut reader)?;
178                    }
179                }
180                entries = parse_catalog(&mut reader, format_major)?;
181            }
182        } else if extension == b'N' || extension == b'S' {
183            if extension == b'S' {
184                read_infinint(&mut reader)?; // slice size (multi-slice header); unused
185            }
186            archive_origin = reader.stream_position()?;
187            let format_value = read_format_value(&mut reader); // 3-byte edition: value = major*256
188            let cat_offset = read_terminateur(&mut reader)?;
189            let cat_start = archive_origin
190                .checked_add(cat_offset)
191                .ok_or_else(|| DarError::Corrupt("catalogue offset overflows".into()))?;
192            let end = reader.seek(SeekFrom::End(0))?;
193            if cat_start >= end {
194                return Err(DarError::Corrupt(format!(
195                    "catalogue start {cat_start} past archive end {end}"
196                )));
197            }
198            reader.seek(SeekFrom::Start(cat_start))?;
199            // Legacy catalogue: no 10-byte label, no path — entries begin here.
200            entries = parse_catalog(&mut reader, format_value >> 8)?;
201        } else {
202            return Err(DarError::Corrupt(format!(
203                "unknown slice-header extension {extension:#04x}"
204            )));
205        }
206
207        Ok(Self {
208            inner: reader,
209            archive_origin,
210            entries,
211        })
212    }
213
214    /// List all archived file entries (path and uncompressed size).
215    pub fn entries(&self) -> Vec<DarEntry> {
216        self.entries
217            .iter()
218            .map(|e| DarEntry {
219                path: e.path.clone(),
220                size: e.size,
221            })
222            .collect()
223    }
224
225    /// Extract a file by path, returning its raw bytes.
226    pub fn extract(&mut self, path: &str) -> Result<Vec<u8>, DarError> {
227        let entry = self
228            .entries
229            .iter()
230            .find(|e| e.path == path)
231            .ok_or_else(|| DarError::EntryNotFound(path.to_string()))?
232            .clone();
233
234        if entry.encrypted {
235            return Err(DarError::Corrupt(format!("'{path}' is encrypted")));
236        }
237
238        // The raw bytes live at archive_origin + archive_offset.  Both fields
239        // are attacker-controlled, so the sum must be checked, and the claimed
240        // size validated against the bytes that actually exist before any
241        // allocation — otherwise a forged stored_size is an allocation bomb.
242        let start = self
243            .archive_origin
244            .checked_add(entry.archive_offset)
245            .ok_or_else(|| {
246                DarError::Corrupt(format!("'{path}' archive offset overflows file position"))
247            })?;
248        let end = self.inner.seek(SeekFrom::End(0))?;
249        if start > end {
250            return Err(DarError::Corrupt(format!(
251                "'{path}' starts at {start}, past archive end {end}"
252            )));
253        }
254        let available = end - start;
255        if entry.stored_size > available {
256            return Err(DarError::Corrupt(format!(
257                "'{path}' claims {} stored bytes but only {available} remain",
258                entry.stored_size
259            )));
260        }
261
262        self.inner.seek(SeekFrom::Start(start))?;
263        let mut data = vec![0u8; entry.stored_size as usize];
264        self.inner.read_exact(&mut data)?;
265
266        if !is_compressed(entry.compression) {
267            return Ok(data);
268        }
269        // Each compressed entry is an independent stream; its uncompressed length
270        // is the catalog `size`, so decode exactly that and reject any mismatch —
271        // a forged stream cannot over-inflate past the declared size.
272        let out = decompress(&data, entry.compression, entry.size)?;
273        if out.len() as u64 != entry.size {
274            return Err(DarError::Corrupt(format!(
275                "'{path}' decompressed to {} bytes but catalog declares {}",
276                out.len(),
277                entry.size
278            )));
279        }
280        Ok(out)
281    }
282}
283
284// ── Catalog parser ────────────────────────────────────────────────────────────
285
286/// On archives larger than this, the catalog scan starts this many bytes
287/// before EOF (the catalog always lives at the tail), avoiding a full read of
288/// a multi-gigabyte forensic archive before falling back to a full scan.
289const TAIL_SCAN: u64 = 256 * 1024 * 1024;
290
291const CHUNK: usize = 4 * 1024 * 1024;
292// OVERLAP = max(SEQT_CATALOGUE.len(), label.len()) - 1; carries bytes across chunk boundaries.
293const OVERLAP: usize = 9;
294
295/// Scan forward from the current reader position searching for either the
296/// `seqt_catalogue` escape or the archive `label`.
297///
298/// Returns `Some(true)` if the escape was found (reader positioned just after it),
299/// `Some(false)` if the label was found (reader positioned just after it),
300/// `None` if EOF was reached without a match.
301fn scan_window<R: Read + Seek>(
302    r: &mut R,
303    label: &[u8; 10],
304    use_label: bool,
305) -> Result<Option<bool>, DarError> {
306    let mut buf = vec![0u8; CHUNK + OVERLAP];
307    let mut overlap_len: usize = 0;
308    loop {
309        let chunk_file_pos = r.stream_position()?;
310        let n = r.read(&mut buf[overlap_len..overlap_len + CHUNK])?;
311        if n == 0 {
312            break;
313        }
314        let total = overlap_len + n;
315        // buf[0..overlap_len]  → tail of previous chunk (file pos: chunk_file_pos - overlap_len)
316        // buf[overlap_len..total] → newly read bytes
317        let buf_base = chunk_file_pos - overlap_len as u64;
318
319        if let Some(i) = buf[..total]
320            .windows(SEQT_CATALOGUE.len())
321            .position(|w| w == SEQT_CATALOGUE)
322        {
323            r.seek(SeekFrom::Start(
324                buf_base + i as u64 + SEQT_CATALOGUE.len() as u64,
325            ))?;
326            return Ok(Some(true));
327        }
328        if use_label {
329            if let Some(i) = buf[..total]
330                .windows(label.len())
331                .position(|w| w == label.as_ref())
332            {
333                r.seek(SeekFrom::Start(buf_base + i as u64 + label.len() as u64))?;
334                return Ok(Some(false));
335            }
336        }
337
338        let keep = OVERLAP.min(total);
339        buf.copy_within(total - keep..total, 0);
340        overlap_len = keep;
341    }
342    Ok(None)
343}
344
345/// Locate the catalog section and position the reader at its first entry.
346///
347/// Returns `true` when the `seqt_catalogue` escape is found — the caller then
348/// skips the 10-byte in-catalog label and (format 11.1+) the path NUL string.
349/// The escape is a *sequential-read tape mark*; it is present only when the
350/// archive was written with tape marks (libdar's default).
351///
352/// Returns `false` when the catalog is located by its `ref_data_name` label
353/// directly. Archives written with tape marks disabled (e.g. by Passware Kit
354/// Mobile, equivalent to `dar -at`) omit the escape; their catalog still begins
355/// with the 10-byte `ref_data_name`, which equals the slice `label`, so scanning
356/// for `label` in the tail finds it — a structural marker, not a heuristic.
357///
358/// Returns `Err(Corrupt)` when neither marker is found.
359///
360/// Strategy: DAR catalogs always live at the tail of the archive.  On forensic
361/// archives ≥ 256 MiB we jump straight to the last 256 MiB and scan forward
362/// from there, then fall back to a full forward scan from `archive_origin` if
363/// needed.  This reduces the I/O for a 92 GiB archive from ~99 GiB to ~107 MiB.
364fn find_catalogue<R: Read + Seek>(r: &mut R, label: &[u8; 10]) -> Result<bool, DarError> {
365    find_catalogue_within(r, label, TAIL_SCAN)
366}
367
368/// Implementation of [`find_catalogue`] with the tail-scan window size as a
369/// parameter so the full-scan fallback can be exercised without a 256 MiB
370/// fixture.
371fn find_catalogue_within<R: Read + Seek>(
372    r: &mut R,
373    label: &[u8; 10],
374    tail_scan: u64,
375) -> Result<bool, DarError> {
376    // All-zero labels cannot be used as a reliable catalog marker (too common
377    // in zero-padded archive bodies).
378    let use_label = !label.iter().all(|&b| b == 0);
379
380    let archive_origin = r.stream_position()?;
381    let file_end = r.seek(SeekFrom::End(0))?;
382
383    if file_end <= archive_origin {
384        return Err(DarError::Corrupt("archive body too short".into()));
385    }
386
387    // Jump to at most tail_scan bytes before end; for small files this equals archive_origin.
388    let tail_start = archive_origin.max(file_end.saturating_sub(tail_scan));
389    r.seek(SeekFrom::Start(tail_start))?;
390
391    if let Some(result) = scan_window(r, label, use_label)? {
392        return Ok(result);
393    }
394
395    // Tail scan missed.  Fall back to a full scan from archive_origin.
396    if tail_start > archive_origin {
397        r.seek(SeekFrom::Start(archive_origin))?;
398        if let Some(result) = scan_window(r, label, use_label)? {
399            return Ok(result);
400        }
401    }
402
403    Err(DarError::Corrupt("seqt_catalogue not found".into()))
404}
405
406/// Read the NUL-terminated `version_string` at the current position and return
407/// `archive_version::value()` = `major*256 + fix`, where `major = b0*256 + b1`
408/// and each byte is `value + 48`. Format <= 7 stores only `"NN"` (fix implicitly
409/// 0); format 8+ stores `"NNf"`. Returns `u32::MAX` for an unreadable string so
410/// an unknown future format is treated as newest.
411fn read_format_value<R: Read>(r: &mut R) -> u32 {
412    let s = read_nul_string(r).unwrap_or_default();
413    let b = s.as_bytes();
414    if b.len() >= 2 {
415        let major = u32::from(b[0].saturating_sub(48)) * 256 + u32::from(b[1].saturating_sub(48));
416        let fix = if b.len() >= 3 {
417            u32::from(b[2].saturating_sub(48))
418        } else {
419            0
420        };
421        major * 256 + fix
422    } else {
423        u32::MAX
424    }
425}
426
427/// True when a libdar compression char names a known compression algorithm.
428/// `compression2char` emits the algorithm letter in lowercase for streamed mode
429/// and uppercase for per-block mode (`z`=gzip, `y`=bzip2, `x`=xz, `l`/`j`/`k`=lzo
430/// variants, `d`=zstd, `q`=lz4); `n` is stored. Any other byte — e.g. a header
431/// placeholder in a non-dar-produced archive — is treated as not compressed, so
432/// the catalogue/entry is read verbatim rather than mis-decoded.
433fn is_compressed(algo: u8) -> bool {
434    matches!(
435        algo.to_ascii_lowercase(),
436        b'z' | b'y' | b'x' | b'l' | b'j' | b'k' | b'd' | b'q'
437    )
438}
439
440/// Inflate one compressed stream, dispatching on the libdar codec char and
441/// rejecting output longer than `max_out` (decompression-bomb guard). Trailing
442/// bytes after the stream (e.g. the archive trailer) are ignored by the decoder.
443fn decompress(data: &[u8], algo: u8, max_out: u64) -> Result<Vec<u8>, DarError> {
444    match algo.to_ascii_lowercase() {
445        // dar's "gzip" is a raw zlib stream (78 xx), not a gzip (1f 8b) wrapper.
446        b'z' => read_bounded(flate2::read::ZlibDecoder::new(data), max_out, "zlib"),
447        b'y' => read_bounded(bzip2_rs::DecoderReader::new(data), max_out, "bzip2"),
448        b'x' => {
449            // lzma-rs is writer-driven and has no output cap, so a BoundedWriter
450            // enforces the same decompression-bomb guard the Read codecs get.
451            let mut input: &[u8] = data;
452            let mut out = BoundedWriter {
453                buf: Vec::new(),
454                max: max_out,
455            };
456            match lzma_rs::xz_decompress(&mut input, &mut out) {
457                Ok(()) => Ok(out.buf),
458                // The DAR trailer follows the catalogue's xz stream. lzma-rs
459                // fully decodes and validates the stream (blocks, index, CRC,
460                // footer magic) before rejecting trailing bytes, so on this one
461                // error the output is already complete and sound. Per-file
462                // extract passes exactly stored_size bytes and never trails.
463                // (String coupling is why lzma-rs is pinned to 0.3.x.)
464                Err(lzma_rs::error::Error::XzError(ref m))
465                    if m == "Unexpected data after last XZ block" =>
466                {
467                    Ok(out.buf)
468                }
469                Err(e) => Err(DarError::Corrupt(format!("xz decode failed: {e}"))),
470            }
471        }
472        other => Err(DarError::Corrupt(format!(
473            "unsupported compression '{}'",
474            other as char
475        ))),
476    }
477}
478
479/// A `Write` sink that buffers up to `max` bytes and then fails, capping the
480/// output of a writer-driven decoder (lzma-rs) against a decompression bomb.
481struct BoundedWriter {
482    buf: Vec<u8>,
483    max: u64,
484}
485
486impl Write for BoundedWriter {
487    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
488        if self.buf.len() as u64 + data.len() as u64 > self.max {
489            return Err(std::io::Error::other("decompressed data exceeds bound"));
490        }
491        self.buf.extend_from_slice(data);
492        Ok(data.len())
493    }
494
495    fn flush(&mut self) -> std::io::Result<()> {
496        Ok(())
497    }
498}
499
500/// Read a decoder to EOF, capping output at `max_out` bytes (one extra byte is
501/// requested so an over-long stream is detected, not silently truncated).
502fn read_bounded<R: Read>(decoder: R, max_out: u64, what: &str) -> Result<Vec<u8>, DarError> {
503    let mut out = Vec::new();
504    decoder
505        .take(max_out.saturating_add(1))
506        .read_to_end(&mut out)
507        .map_err(|e| DarError::Corrupt(format!("{what} decode failed: {e}")))?;
508    if out.len() as u64 > max_out {
509        return Err(DarError::Corrupt("decompressed data exceeds bound".into()));
510    }
511    Ok(out)
512}
513
514/// Locate the catalogue in a pre-format-8 archive via the end `terminateur`
515/// trailer (libdar terminateur.cpp:95-138), returning the catalogue start offset
516/// relative to `archive_origin`.
517///
518/// From EOF, count trailing `0xFF` padding bytes (8 bits each); the first
519/// non-`0xFF` byte encodes the remaining count in unary as its set high bits.
520/// `byte_offset = total_bits * 4` is the distance back from that byte to the
521/// catalogue-position infinint. The `0xFF` run is bounded so a hostile all-`0xFF`
522/// tail cannot spin or overflow.
523fn read_terminateur<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
524    const BLOCK_SIZE: u64 = 4;
525    const MAX_BITS: u64 = 4096; // far beyond any real terminator
526
527    let mut pos = r.seek(SeekFrom::End(0))?;
528    let mut bits: u64 = 0;
529    let terminal = loop {
530        if pos == 0 {
531            return Err(DarError::Corrupt("terminator underflows archive".into()));
532        }
533        pos -= 1;
534        r.seek(SeekFrom::Start(pos))?;
535        let b = read_u8(r)?;
536        if b == 0xFF {
537            bits += 8;
538            if bits > MAX_BITS {
539                return Err(DarError::Corrupt("terminator padding too long".into()));
540            }
541        } else {
542            break b;
543        }
544    };
545    // The terminator byte must have its top bit set; count consecutive set MSBs.
546    if terminal & 0x80 == 0 {
547        return Err(DarError::Corrupt(format!(
548            "invalid terminator byte {terminal:#04x}"
549        )));
550    }
551    let mut x = terminal;
552    while x != 0 {
553        if x & 0x80 == 0 {
554            return Err(DarError::Corrupt("malformed terminator bit run".into()));
555        }
556        bits += 1;
557        x <<= 1;
558    }
559    let byte_offset = bits * BLOCK_SIZE;
560    let infinint_start = pos
561        .checked_sub(byte_offset)
562        .ok_or_else(|| DarError::Corrupt("terminator offset underflows".into()))?;
563    r.seek(SeekFrom::Start(infinint_start))?;
564    read_infinint(r)
565}
566
567/// Parse all catalog entries, returning file entries with their extraction info.
568///
569/// Stops when the root directory is closed (depth reaches zero) or an unknown
570/// entry type is encountered (slice trailer).
571fn parse_catalog<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<Vec<EntryRef>, DarError> {
572    let mut entries = Vec::new();
573    let mut dir_stack: Vec<String> = Vec::new();
574    let mut depth: u32 = 0;
575
576    loop {
577        let mut buf = [0u8; 1];
578        match r.read_exact(&mut buf) {
579            Ok(()) => {}
580            Err(_) => break,
581        }
582
583        // Lower 5 bits of cat_sig + 0x60 gives the ASCII type letter.
584        let entry_type = ((buf[0] & 0x1f) | 0x60) as char;
585
586        match entry_type {
587            'z' => {
588                // End of directory
589                depth = depth.saturating_sub(1);
590                dir_stack.pop();
591                if depth == 0 {
592                    break;
593                }
594            }
595            'd' => {
596                let name = read_nul_string(r)?;
597                let flags = read_inode_base(r, format_major)?;
598                if format_major >= 9 && (flags >> 4) & 1 != 0 {
599                    skip_fsa(r)?;
600                }
601                depth += 1;
602                // <ROOT> is a virtual root; don't include it in file paths.
603                if name != "<ROOT>" {
604                    dir_stack.push(name);
605                }
606            }
607            'f' => {
608                let name = read_nul_string(r)?;
609                let flags = read_inode_base(r, format_major)?;
610                if format_major >= 9 && (flags >> 4) & 1 != 0 {
611                    skip_fsa(r)?;
612                }
613
614                let size = read_infinint(r)?;
615                let archive_offset = read_infinint(r)?;
616                let mut stored_size = read_infinint(r)?;
617                // Format <= 7 has no per-file encryption/compression bytes and a
618                // fixed 2-byte CRC (no length prefix); format 8+ stores both bytes
619                // and a length-prefixed CRC (libdar cat_file.cpp:160-252, crc.cpp).
620                let (encryption_flag, compression) = if format_major >= 8 {
621                    (read_u8(r)?, read_u8(r)?)
622                } else {
623                    (0u8, b'n')
624                };
625                if format_major >= 8 {
626                    let crc_size = read_infinint(r)?;
627                    skip(r, crc_size)?;
628                } else {
629                    skip(r, 2)?; // fixed 2-byte CRC
630                }
631                // Pre-8: storage_size 0 means the data is stored uncompressed.
632                if format_major <= 7 && stored_size == 0 {
633                    stored_size = size;
634                }
635
636                let path = if dir_stack.is_empty() {
637                    name
638                } else {
639                    format!("{}/{}", dir_stack.join("/"), name)
640                };
641
642                entries.push(EntryRef {
643                    path,
644                    size,
645                    archive_offset,
646                    stored_size,
647                    compression,
648                    encrypted: encryption_flag != 0,
649                });
650            }
651            'l' => {
652                // Symbolic link: inode + NUL-terminated target path; not extractable.
653                let _name = read_nul_string(r)?;
654                let flags = read_inode_base(r, format_major)?;
655                if format_major >= 9 && (flags >> 4) & 1 != 0 {
656                    skip_fsa(r)?;
657                }
658                skip_nul_string(r)?; // symlink target
659            }
660            _ => break, // unknown type = slice trailer or unhandled entry
661        }
662    }
663
664    Ok(entries)
665}
666
667// ── Low-level I/O helpers ─────────────────────────────────────────────────────
668
669/// Read a DAR variable-length infinint, decoded to `u64`.
670///
671/// Format (TG=4): optional leading `0x00` skip-bytes, then a terminal byte
672/// with exactly one bit set; `pos = terminal.leading_zeros()` and the value
673/// occupies `(skip_count * 8 + pos + 1) * 4` big-endian bytes.
674///
675/// A `u64` holds at most 8 data bytes.  Any encoding wider than that — i.e.
676/// *any* leading `0x00` (which alone implies ≥ 36 bytes) or a terminal below
677/// `0x40` (`pos > 1`) — cannot be represented and is rejected as `Corrupt`
678/// rather than silently truncated.  This single bound also removes the
679/// `(skip * 8 …)` arithmetic-overflow panic and caps the leading-zero scan, so
680/// a malicious all-zero run can never spin or overflow the skip counter.
681fn read_infinint<R: Read>(r: &mut R) -> Result<u64, DarError> {
682    let terminal = read_u8(r)?;
683    if terminal == 0x00 {
684        // A skip-byte group is at least 36 data bytes — far beyond u64.
685        return Err(DarError::Corrupt(
686            "infinint exceeds 64-bit range (multi-group encoding)".into(),
687        ));
688    }
689    if terminal.count_ones() != 1 {
690        return Err(DarError::Corrupt(format!(
691            "invalid infinint terminal: {terminal:#04x}"
692        )));
693    }
694    let pos = terminal.leading_zeros(); // 0 ..= 7
695    if pos > 1 {
696        // data_bytes = (pos + 1) * 4 > 8 → does not fit in u64.
697        return Err(DarError::Corrupt(format!(
698            "infinint exceeds 64-bit range: terminal {terminal:#04x} implies {} bytes",
699            (pos + 1) * 4
700        )));
701    }
702    let data_bytes = (pos + 1) * 4; // 4 (terminal 0x80) or 8 (terminal 0x40)
703    let mut val: u64 = 0;
704    for _ in 0..data_bytes {
705        val = (val << 8) | u64::from(read_u8(r)?);
706    }
707    Ok(val)
708}
709
710fn read_u8<R: Read>(r: &mut R) -> Result<u8, DarError> {
711    let mut b = [0u8; 1];
712    r.read_exact(&mut b)?;
713    Ok(b[0])
714}
715
716/// Upper bound on a NUL-terminated path/name field.  Real DAR entries stay
717/// well under this; the cap stops a NUL-free region of a hostile archive from
718/// growing the buffer until EOF (or OOM on a multi-GiB stream).
719const MAX_NUL_STRING: usize = 64 * 1024;
720
721/// Read a NUL-terminated UTF-8 string, consuming the NUL byte.
722fn read_nul_string<R: Read>(r: &mut R) -> Result<String, DarError> {
723    let mut bytes = Vec::new();
724    loop {
725        let b = read_u8(r)?;
726        if b == 0 {
727            break;
728        }
729        if bytes.len() >= MAX_NUL_STRING {
730            return Err(DarError::Corrupt(format!(
731                "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
732            )));
733        }
734        bytes.push(b);
735    }
736    String::from_utf8(bytes).map_err(|e| DarError::Corrupt(e.to_string()))
737}
738
739/// Skip a NUL-terminated string without collecting the bytes.
740fn skip_nul_string<R: Read>(r: &mut R) -> Result<(), DarError> {
741    let mut len: usize = 0;
742    loop {
743        if read_u8(r)? == 0 {
744            return Ok(());
745        }
746        len += 1;
747        if len > MAX_NUL_STRING {
748            return Err(DarError::Corrupt(format!(
749                "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
750            )));
751        }
752    }
753}
754
755/// Seek past `n` bytes.
756fn skip<R: Seek>(r: &mut R, n: u64) -> Result<(), DarError> {
757    if n > 0 {
758        // `SeekFrom::Current` takes an i64; a value above i64::MAX would cast to
759        // a negative offset and seek *backwards* (re-reading earlier bytes on a
760        // File).  No real DAR field is that large — reject it outright.
761        let off = i64::try_from(n)
762            .map_err(|_| DarError::Corrupt(format!("skip length {n} exceeds seekable range")))?;
763        r.seek(SeekFrom::Current(off)).map_err(DarError::Io)?;
764    }
765    Ok(())
766}
767
768/// Skip one DAR timestamp field.
769///
770/// Timestamps are prefixed with a type byte:
771/// - `'s'` (0x73) and others: seconds only — one infinint follows
772/// - `'n'` (0x6e): nanosecond precision — two infinints follow (seconds + nanoseconds)
773fn skip_timestamp<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<(), DarError> {
774    // Format 8 and earlier store a bare seconds infinint with NO precision byte
775    // (libdar datetime.cpp:372). Format 9+ prefix a unit byte ('s' seconds,
776    // 'u' microsecond, 'n' nanosecond); sub-second units add a second infinint.
777    if format_major < 9 {
778        read_infinint(r)?;
779        return Ok(());
780    }
781    let ts_type = read_u8(r)?;
782    read_infinint(r)?;
783    if ts_type == b'n' || ts_type == b'u' {
784        read_infinint(r)?;
785    }
786    Ok(())
787}
788
789/// Read the inode flags byte and seek past the remaining inode fields.
790///
791/// Base layout: flags(1) + uid(inf) + gid(inf) + perms(2) + 3 timestamps
792///   Each timestamp: see [`skip_timestamp`] (version-dependent).
793///   FSA inode fields (format 9+ only): two infinints when (flags >> 4) & 1 == 1.
794fn read_inode_base<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<u8, DarError> {
795    let flags = read_u8(r)?;
796    // uid/gid: 2-byte u16 for format <= 7 (libdar cat_inode.cpp:171), infinint for 8+.
797    if format_major <= 7 {
798        skip(r, 4)?; // uid (u16) + gid (u16)
799    } else {
800        read_infinint(r)?; // uid
801        read_infinint(r)?; // gid
802    }
803    skip(r, 2)?; // perms (always a 2-byte big-endian u16, never an infinint)
804    skip_timestamp(r, format_major)?; // atime
805    skip_timestamp(r, format_major)?; // mtime
806                                      // ctime (last_cha) exists only from format 8 (libdar cat_inode.cpp:197).
807    if format_major >= 8 {
808        skip_timestamp(r, format_major)?;
809    }
810    // FSA inode fields exist only from format 9 (libdar cat_inode.cpp:264); bit
811    // 0x10 is the FSA-full status. Formats <= 8 have no FSA.
812    if format_major >= 9 && (flags >> 4) & 1 != 0 {
813        read_infinint(r)?;
814        read_infinint(r)?;
815    }
816    Ok(flags)
817}
818
819/// Skip one FSA (filesystem attributes) block.
820///
821/// Format: infinint(family_tag) + infinint(data_size) + data_size bytes.
822fn skip_fsa<R: Read + Seek>(r: &mut R) -> Result<(), DarError> {
823    let _tag = read_infinint(r)?;
824    let size = read_infinint(r)?;
825    skip(r, size)
826}
827
828// ── Unit tests ────────────────────────────────────────────────────────────────
829
830#[cfg(test)]
831mod tests {
832    use super::*;
833    use std::io::Cursor;
834
835    // ── read_infinint ─────────────────────────────────────────────────────────
836
837    #[test]
838    fn infinint_decodes_value() {
839        let data = [0x80u8, 0x00, 0x00, 0x00, 0x0d];
840        assert_eq!(read_infinint(&mut Cursor::new(&data[..])).unwrap(), 13);
841    }
842
843    #[test]
844    fn infinint_bad_preamble_returns_corrupt() {
845        // 0x03 = two bits set — not a valid infinint terminal.
846        let data = [0x03u8, 0x00, 0x00, 0x00, 0x00];
847        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
848        assert!(matches!(&err, DarError::Corrupt(_)));
849    }
850
851    #[test]
852    fn infinint_truncated_returns_io() {
853        // Only 2 bytes — read_exact needs 5.
854        let err = read_infinint(&mut Cursor::new(&[0x80u8, 0x00][..])).unwrap_err();
855        assert!(matches!(err, DarError::Io(_)));
856    }
857
858    #[test]
859    fn infinint_0x40_preamble_reads_8_data_bytes() {
860        // 0x40 terminal: leading_zeros=1, pos=1, data_bytes=(0*8+1+1)*4=8
861        // Encodes the value 0x5d15_9331 in 8 big-endian bytes.
862        let mut data = vec![0x40u8];
863        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x5d, 0x15, 0x93, 0x31]);
864        assert_eq!(
865            read_infinint(&mut Cursor::new(data)).unwrap(),
866            0x5d15_9331u64
867        );
868    }
869
870    #[test]
871    fn infinint_multi_bit_terminal_returns_corrupt() {
872        // 0x60 = 0110_0000 — two bits set, not a valid terminal.
873        let data = [0x60u8, 0x00, 0x00, 0x00, 0x00];
874        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
875        assert!(matches!(&err, DarError::Corrupt(_)));
876    }
877
878    // ── read_u8 ───────────────────────────────────────────────────────────────
879
880    #[test]
881    fn read_u8_reads_single_byte() {
882        assert_eq!(read_u8(&mut Cursor::new(&[0x42u8][..])).unwrap(), 0x42);
883    }
884
885    #[test]
886    fn read_u8_eof_returns_io() {
887        let err = read_u8(&mut Cursor::new(&[][..])).unwrap_err();
888        assert!(matches!(err, DarError::Io(_)));
889    }
890
891    // ── read_nul_string ───────────────────────────────────────────────────────
892
893    #[test]
894    fn nul_string_reads_until_nul() {
895        let data = b"hello\x00world";
896        assert_eq!(
897            read_nul_string(&mut Cursor::new(&data[..])).unwrap(),
898            "hello"
899        );
900    }
901
902    #[test]
903    fn nul_string_invalid_utf8_returns_corrupt() {
904        // 0xFF 0x80 is not valid UTF-8; 0x00 terminates.
905        let data = [0xFF, 0x80, 0x00];
906        let err = read_nul_string(&mut Cursor::new(&data[..])).unwrap_err();
907        assert!(matches!(err, DarError::Corrupt(_)));
908    }
909
910    #[test]
911    fn nul_string_eof_before_nul_returns_io() {
912        let err = read_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
913        assert!(matches!(err, DarError::Io(_)));
914    }
915
916    // ── skip_nul_string ───────────────────────────────────────────────────────
917
918    #[test]
919    fn skip_nul_string_advances_past_nul() {
920        let data = b"skip\x00rest";
921        let mut c = Cursor::new(data.to_vec());
922        skip_nul_string(&mut c).unwrap();
923        assert_eq!(c.position(), 5); // "skip\0" = 5 bytes consumed
924    }
925
926    #[test]
927    fn skip_nul_string_eof_returns_io() {
928        let err = skip_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
929        assert!(matches!(err, DarError::Io(_)));
930    }
931
932    // ── find_catalogue ────────────────────────────────────────────────────────
933
934    #[test]
935    fn find_catalogue_body_too_short() {
936        // Fewer than 6 bytes — can't fill the initial window; label also too short.
937        let label = [0u8; 10];
938        let err = find_catalogue(&mut Cursor::new(&[0x01u8, 0x02, 0x03][..]), &label).unwrap_err();
939        assert!(
940            matches!(&err, DarError::Corrupt(s) if s == "archive body too short"
941            || s == "seqt_catalogue not found")
942        );
943    }
944
945    #[test]
946    fn find_catalogue_escape_at_start() {
947        let mut data = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43, 0xFF];
948        let mut c = Cursor::new(&mut data[..]);
949        let via_escape = find_catalogue(&mut c, &[0u8; 10]).unwrap();
950        assert!(via_escape);
951        assert_eq!(c.position(), 6);
952    }
953
954    #[test]
955    fn find_catalogue_escape_not_found() {
956        // 10 bytes of zeros, label is 0xFF×10 so label scan also fails.
957        let label = [0xFFu8; 10];
958        let err = find_catalogue(&mut Cursor::new(&[0u8; 10][..]), &label).unwrap_err();
959        assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
960    }
961
962    #[test]
963    fn find_catalogue_label_fallback() {
964        let label: [u8; 10] = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A];
965        // Prefix junk (no escape) followed by the label bytes.
966        let mut data = vec![0x00u8; 5];
967        data.extend_from_slice(&label);
968        let mut c = Cursor::new(data);
969        let via_escape = find_catalogue(&mut c, &label).unwrap();
970        assert!(!via_escape);
971        assert_eq!(c.position(), 15); // 5 junk + 10 label consumed
972    }
973
974    // ── skip ──────────────────────────────────────────────────────────────────
975
976    #[test]
977    fn skip_zero_does_not_move_cursor() {
978        let mut c = Cursor::new(vec![0xFFu8; 10]);
979        skip(&mut c, 0).unwrap();
980        assert_eq!(c.position(), 0);
981    }
982
983    #[test]
984    fn skip_n_advances_cursor() {
985        let mut c = Cursor::new(vec![0xFFu8; 10]);
986        skip(&mut c, 7).unwrap();
987        assert_eq!(c.position(), 7);
988    }
989
990    // ── read_inode_base ───────────────────────────────────────────────────────
991
992    #[test]
993    fn inode_base_bit4_clear_reads_31_bytes() {
994        // flags(1) + uid(5) + gid(5) + perms(2) + 3×[type(1)+secs(5)] = 31 bytes
995        let mut data = vec![0x00u8]; // flags (bit4=0)
996        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // uid
997        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // gid
998        data.extend_from_slice(&[0x00, 0x00]); // perms
999        for _ in 0..3 {
1000            data.push(b's'); // timestamp type
1001            data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // seconds
1002        }
1003        data.push(0xFF); // sentinel — must not be consumed
1004        let mut c = Cursor::new(data);
1005        assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x00);
1006        assert_eq!(c.position(), 31);
1007    }
1008
1009    #[test]
1010    fn inode_base_bit4_set_reads_41_bytes() {
1011        // flags(1) + uid(5) + gid(5) + perms(2) + 3×[type(1)+secs(5)] + nlink(5) + field9(5) = 41
1012        let mut data = vec![0x10u8]; // flags (bit4=1)
1013        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // uid
1014        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // gid
1015        data.extend_from_slice(&[0x00, 0x00]); // perms
1016        for _ in 0..3 {
1017            data.push(b's');
1018            data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]);
1019        }
1020        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // nlink
1021        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // field9
1022        data.push(0xFF); // sentinel
1023        let mut c = Cursor::new(data);
1024        assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x10);
1025        assert_eq!(c.position(), 41);
1026    }
1027
1028    // ── skip_fsa ─────────────────────────────────────────────────────────────
1029
1030    #[test]
1031    fn skip_fsa_consumes_tag_size_and_data() {
1032        // tag=infinint(5) + size=infinint(3) + 3 data bytes
1033        let mut data = Vec::new();
1034        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x05]); // tag
1035        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x03]); // size=3
1036        data.extend_from_slice(&[0xAA, 0xBB, 0xCC]); // data
1037        data.push(0xFF); // sentinel
1038        let mut c = Cursor::new(data);
1039        skip_fsa(&mut c).unwrap();
1040        assert_eq!(c.position(), 13); // 5 + 5 + 3 = 13
1041    }
1042
1043    // ── hardening: malicious / corrupted infinint encodings ───────────────────
1044    //
1045    // A `u64` holds at most 8 data bytes.  The reader's contract is "decode to
1046    // u64 or return Corrupt" — it must never silently truncate an over-wide
1047    // value, overflow while computing the byte count, or loop on a zero run.
1048
1049    #[test]
1050    fn infinint_leading_zero_byte_returns_corrupt() {
1051        // A leading 0x00 skip-byte implies a ≥36-byte group — far beyond u64.
1052        // Must be rejected as Corrupt, not mislabelled as an I/O shortage.
1053        let data = [0x00u8, 0x80, 0x00, 0x00, 0x00, 0x00];
1054        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
1055        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1056    }
1057
1058    #[test]
1059    fn infinint_12_byte_group_exceeds_u64_returns_corrupt() {
1060        // 0x20 terminal → pos=2 → 12 data bytes → cannot fit in u64.
1061        // Must error rather than silently truncate to a wrong value.
1062        let mut data = vec![0x20u8];
1063        data.extend_from_slice(&[0x11; 12]);
1064        let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1065        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1066    }
1067
1068    #[test]
1069    fn infinint_all_zero_run_returns_corrupt_without_hanging() {
1070        // A run of zero bytes must terminate promptly with Corrupt, never spin
1071        // consuming the whole stream (and never overflow-panic the skip count).
1072        let data = vec![0u8; 4096];
1073        let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1074        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1075    }
1076
1077    // ── hardening: unbounded NUL-terminated strings ───────────────────────────
1078
1079    #[test]
1080    fn nul_string_without_terminator_is_length_bounded() {
1081        // No NUL in 200 KiB of data: must be rejected once the path cap is hit,
1082        // not grow the buffer until EOF (or OOM on a multi-GiB stream).
1083        let data = vec![b'A'; 200_000];
1084        let err = read_nul_string(&mut Cursor::new(data)).unwrap_err();
1085        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1086    }
1087
1088    #[test]
1089    fn skip_nul_string_without_terminator_is_length_bounded() {
1090        let data = vec![b'A'; 200_000];
1091        let err = skip_nul_string(&mut Cursor::new(data)).unwrap_err();
1092        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1093    }
1094
1095    // ── hardening: skip must never seek backwards ─────────────────────────────
1096
1097    #[test]
1098    fn skip_value_above_i64_max_returns_corrupt() {
1099        // n > i64::MAX casts to a negative i64 → SeekFrom::Current would seek
1100        // *backwards* on a File (re-reading earlier bytes).  Must be rejected,
1101        // and the stream position must not move.
1102        let mut c = Cursor::new(vec![0u8; 64]);
1103        c.set_position(32);
1104        let err = skip(&mut c, 0x8000_0000_0000_0000).unwrap_err();
1105        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1106        assert_eq!(c.position(), 32); // unchanged on a rejected skip
1107    }
1108
1109    // ── terminateur trailer (pre-8 catalog locator) ───────────────────────────
1110
1111    #[test]
1112    fn terminateur_reads_catalogue_offset() {
1113        // pos infinint 0x18 = 24; terminator 0xc0 → two leading ones → 2*4 = 8
1114        // bytes back to the infinint.
1115        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xc0];
1116        assert_eq!(read_terminateur(&mut Cursor::new(data)).unwrap(), 24);
1117    }
1118
1119    #[test]
1120    fn terminateur_all_ff_underflows_returns_corrupt() {
1121        let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 4])).unwrap_err();
1122        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1123    }
1124
1125    #[test]
1126    fn terminateur_excessive_ff_padding_returns_corrupt() {
1127        let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 600])).unwrap_err();
1128        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1129    }
1130
1131    #[test]
1132    fn terminateur_low_terminator_byte_returns_corrupt() {
1133        // Terminator byte 0x01 has no top bit set.
1134        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x01];
1135        let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1136        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1137    }
1138
1139    #[test]
1140    fn terminateur_noncontiguous_high_bits_returns_corrupt() {
1141        // 0xA0 = 1010_0000: top bit set but the high-bit run is not contiguous.
1142        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0xA0];
1143        let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1144        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1145    }
1146
1147    // ── find_catalogue: full-scan fallback + body-too-short ────────────────────
1148
1149    #[test]
1150    fn find_catalogue_falls_back_to_full_scan() {
1151        // Escape near the start; a tiny tail window misses it, forcing the
1152        // archive_origin full-scan fallback.
1153        let mut data = vec![0x11u8, 0x22]; // junk before the escape
1154        data.extend_from_slice(&SEQT_CATALOGUE);
1155        data.extend_from_slice(&[0x33u8; 12]); // trailing bytes beyond the tail window
1156        let mut c = Cursor::new(data);
1157        let via_escape = find_catalogue_within(&mut c, &[0u8; 10], 4).unwrap();
1158        assert!(via_escape);
1159        assert_eq!(c.position(), 2 + SEQT_CATALOGUE.len() as u64);
1160    }
1161
1162    #[test]
1163    fn find_catalogue_full_scan_miss_returns_not_found() {
1164        // No escape and no matching label anywhere; a tiny tail window forces
1165        // the full-scan fallback, which also misses → "not found".
1166        let mut c = Cursor::new(vec![0x11u8; 16]);
1167        let err = find_catalogue_within(&mut c, &[0xABu8; 10], 4).unwrap_err();
1168        assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1169    }
1170
1171    #[test]
1172    fn find_catalogue_body_too_short_when_origin_at_eof() {
1173        let mut c = Cursor::new(vec![0u8; 6]);
1174        c.seek(SeekFrom::Start(6)).unwrap();
1175        let err = find_catalogue(&mut c, &[0u8; 10]).unwrap_err();
1176        assert!(matches!(&err, DarError::Corrupt(s) if s == "archive body too short"));
1177    }
1178
1179    // ── decompress ─────────────────────────────────────────────────────────────
1180
1181    #[test]
1182    fn decompress_rejects_decompression_bomb() {
1183        use flate2::{write::ZlibEncoder, Compression};
1184        use std::io::Write;
1185        let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
1186        enc.write_all(&[0u8; 4096]).unwrap();
1187        let blob = enc.finish().unwrap();
1188        // Inflates to 4096 bytes but the caller caps output at 16.
1189        let err = decompress(&blob, b'z', 16).unwrap_err();
1190        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("exceeds bound")));
1191    }
1192
1193    #[test]
1194    fn decompress_rejects_malformed_zlib() {
1195        let err = decompress(b"not a zlib stream at all", b'z', 1024).unwrap_err();
1196        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zlib decode failed")));
1197    }
1198
1199    #[test]
1200    fn decompress_rejects_malformed_xz() {
1201        let err = decompress(b"this is not an xz stream", b'x', 1024).unwrap_err();
1202        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("xz decode failed")));
1203    }
1204
1205    #[test]
1206    fn bounded_writer_caps_output_and_flushes() {
1207        let mut w = BoundedWriter {
1208            buf: Vec::new(),
1209            max: 4,
1210        };
1211        assert_eq!(w.write(b"ab").unwrap(), 2); // within bound
1212        w.flush().unwrap();
1213        let err = w.write(b"cde").unwrap_err(); // 2 + 3 > 4
1214        assert_eq!(err.to_string(), "decompressed data exceeds bound");
1215        assert_eq!(w.buf, b"ab");
1216    }
1217}