Skip to main content

dar_forensic/
lib.rs

1//! Pure-Rust reader for Denis Corbin DAR (Disk ARchiver) archives.
2//!
3//! Supports DAR formats 7–11 (produced by dar 2.3–2.8) and the legacy ≤7 grammar.
4//! Passware Kit Mobile produces format-9 archives; dar 2.8.5 produces 11.3.
5//! Entries and the catalogue compressed with gzip, bzip2 or xz are transparently
6//! decompressed (pure-Rust); lzo, zstd, lz4 and encryption are not decoded.
7//!
8//! ## Format sketch
9//!
10//! ```text
11//! Slice header:
12//!   [4]  magic = 00 00 00 7b  (SAUV_MAGIC_NUMBER = 123, big-endian u32)
13//!   [10] internal_name label
14//!   [1]  flag  [1]  ext_char
15//!   TLV list:  infinint(count) + count × (u16 type + infinint len + data)
16//!   ← archive_origin: all catalog archive_offset values are relative to here
17//!
18//! Archive body:
19//!   escaped sequences (seqt_file, seqt_saved, …) + raw file bytes
20//!
21//! Catalog  (located by seqt_catalogue escape: AD FD EA 77 21 43):
22//!   [10] label  +  (NUL working-dir path, format 11.1+ only)  +  entries
23//!
24//!   Each entry: cat_sig byte where (cat_sig & 0x1f | 0x60) gives type
25//!     'd' directory  → NUL-name + inode [+ FSA]  (push to dir stack)
26//!     'f' file       → NUL-name + inode [+ FSA] + file-specific fields
27//!     'z' EOD        → pop dir stack; depth=0 → done
28//! ```
29//!
30//! ## Key non-obvious invariants
31//!
32//! - **Infinint**: variable-length. The common form is 5 bytes
33//!   (`0x80 XX XX XX XX`, a big-endian u32); timestamps past 2^32 use the
34//!   9-byte `0x40` form (big-endian u64). Encodings wider than 64 bits are
35//!   rejected as corrupt — this reader decodes to `u64` or errors, never
36//!   truncates.
37//! - **Permissions**: 2-byte big-endian u16, *not* an infinint.
38//! - **Timestamps**: format 8 stores a bare seconds infinint; format 9+ prefix
39//!   a unit byte (`'s'`/`'u'`/`'n'`) and add a sub-second infinint for `'u'`/`'n'`.
40//! - **FSA** (format 9+ only): inode flag bit `0x10` (FSA-full) adds inode
41//!   infinints and an FSA block; format 8 has no FSA.
42//! - **archive_offset**: points *directly* to the raw file bytes, not to the
43//!   data-section header that precedes them in the body stream.
44//!   `seek(archive_origin + archive_offset)` then `read(stored_size)`.
45//!
46//! Full format notes: `docs/implementation-notes.md`.
47
48use std::io::{Cursor, Read, Seek, SeekFrom, Write};
49
50use thiserror::Error;
51
52/// `00 00 00 7b` — DAR magic (SAUV_MAGIC_NUMBER = 123, big-endian u32).
53const DAR_MAGIC: [u8; 4] = [0x00, 0x00, 0x00, 0x7b];
54
55/// Upper bound on the compressed catalogue bytes read from the archive tail and
56/// on the inflated catalogue, guarding against a decompression bomb (per-file
57/// streams need no such constant — they are bounded by the entry's known size).
58const MAX_CATALOGUE_COMPRESSED: u64 = 512 * 1024 * 1024;
59const MAX_CATALOGUE_INFLATED: u64 = 1024 * 1024 * 1024;
60
61/// Escape sequence marking the catalog: `AD FD EA 77 21 43`.
62const SEQT_CATALOGUE: [u8; 6] = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43];
63
64/// First archive format with an in-place (working-directory) path in the
65/// catalog header — `archive_version(11,1)` → `value() = 11*256 + 1`.
66/// Formats 8, 9, 10 and 11.0 have no such field.
67const FORMAT_11_1: u32 = 11 * 256 + 1;
68
69/// Errors returned by [`DarReader`].
70#[derive(Debug, Error)]
71pub enum DarError {
72    #[error("I/O error: {0}")]
73    Io(#[from] std::io::Error),
74    #[error("not a DAR archive")]
75    NotADar,
76    #[error("corrupt archive: {0}")]
77    Corrupt(String),
78    #[error("entry not found: '{0}'")]
79    EntryNotFound(String),
80}
81
82/// Metadata about one archived file.
83#[derive(Debug, Clone)]
84pub struct DarEntry {
85    pub path: String,
86    pub size: u64,
87}
88
89#[derive(Debug, Clone)]
90struct EntryRef {
91    path: String,
92    size: u64,
93    archive_offset: u64,
94    stored_size: u64,
95    compression: u8,
96    encrypted: bool,
97}
98
99/// Read-only DAR archive reader.
100pub struct DarReader<R: Read + Seek> {
101    inner: R,
102    /// Byte position immediately after the slice header TLV block.
103    /// `archive_origin + archive_offset` = absolute position of raw file bytes.
104    archive_origin: u64,
105    /// Archive format major version (`value() >> 8`). Format 1 stores no
106    /// per-entry `storage_size`, so a compressed format-1 entry is decoded by
107    /// streaming the codec to its natural end rather than reading a fixed length.
108    format_major: u32,
109    entries: Vec<EntryRef>,
110}
111
112impl<R: Read + Seek> DarReader<R> {
113    /// Open a DAR archive, validating the magic and loading the catalog.
114    pub fn open(mut reader: R) -> Result<Self, DarError> {
115        let mut magic = [0u8; 4];
116        reader
117            .read_exact(&mut magic)
118            .map_err(|_| DarError::NotADar)?;
119        if magic != DAR_MAGIC {
120            return Err(DarError::NotADar);
121        }
122
123        let mut label = [0u8; 10];
124        reader.read_exact(&mut label)?; // internal_name label
125        let _flag = read_u8(&mut reader)?; // slice flag ('T' terminal / 'N' / 'E')
126        let extension = read_u8(&mut reader)?; // 'T' = TLV (format 8+); 'N'/'S' = legacy (<= 7)
127
128        // Format 8+ carries a TLV list and a `seqt_catalogue` escape; format <= 7
129        // has neither — its catalogue is located via the end `terminateur` trailer
130        // (libdar header.cpp extension handling; terminateur.cpp).
131        let entries;
132        let archive_origin;
133        let format_major;
134        if extension == b'T' {
135            // TLV list: infinint(count) then count × (u16 type + infinint len + data)
136            let tlv_count = read_infinint(&mut reader).map_err(|e| match e {
137                DarError::Io(_) => DarError::Corrupt("truncated TLV block".into()),
138                other => other,
139            })?;
140            for _ in 0..tlv_count {
141                skip(&mut reader, 2)?;
142                let len = read_infinint(&mut reader)?;
143                skip(&mut reader, len)?;
144            }
145
146            archive_origin = reader.stream_position()?;
147            let format_value = read_format_value(&mut reader);
148            // The archive's global compression algorithm is the byte immediately
149            // after the version string; it tells us whether (and how) the
150            // catalogue stream is compressed. Unreadable → treat as stored.
151            let global_comp = read_u8(&mut reader).unwrap_or(b'n');
152            reader.seek(SeekFrom::Start(archive_origin))?;
153
154            // true → seqt_catalogue tape mark found (catalog has label + maybe path);
155            // false → located by its ref_data_name label (tape marks off, e.g. Passware).
156            let via_escape = find_catalogue(&mut reader, &label)?;
157            format_major = format_value >> 8;
158            if via_escape && is_compressed(global_comp) {
159                // The catalogue is a single stream compressed with the archive
160                // codec, beginning right after the seqt_catalogue escape and
161                // running to the trailer. Inflate it, then parse from the
162                // plaintext buffer — which begins with the in-catalog label and
163                // optional in-place path, exactly like the uncompressed case.
164                let mut compressed = Vec::new();
165                reader
166                    .by_ref()
167                    .take(MAX_CATALOGUE_COMPRESSED)
168                    .read_to_end(&mut compressed)?;
169                let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
170                let mut cur = Cursor::new(inflated);
171                skip(&mut cur, 10)?; // catalog label
172                if format_value >= FORMAT_11_1 {
173                    skip_nul_string(&mut cur)?;
174                }
175                entries = parse_catalog(&mut cur, format_major, global_comp)?;
176            } else {
177                if via_escape {
178                    skip(&mut reader, 10)?; // catalog label
179                                            // The in-place path exists only from format 11.1
180                                            // (catalogue.cpp:157). Formats 8/9/10/11.0 have none.
181                    if format_value >= FORMAT_11_1 {
182                        skip_nul_string(&mut reader)?;
183                    }
184                }
185                entries = parse_catalog(&mut reader, format_major, global_comp)?;
186            }
187        } else if extension == b'N' || extension == b'S' {
188            if extension == b'S' {
189                read_infinint(&mut reader)?; // slice size (multi-slice header); unused
190            }
191            archive_origin = reader.stream_position()?;
192            let format_value = read_format_value(&mut reader); // 3-byte edition: value = major*256
193            format_major = format_value >> 8;
194            // The global compression char follows the version string (same as
195            // format 8+). Formats <= 7 carry no per-entry compression byte, so
196            // this single char governs both the catalogue and every entry's data.
197            let global_comp = read_u8(&mut reader).unwrap_or(b'n');
198            let cat_offset = read_terminateur(&mut reader)?;
199            let cat_start = archive_origin
200                .checked_add(cat_offset)
201                .ok_or_else(|| DarError::Corrupt("catalogue offset overflows".into()))?;
202            let end = reader.seek(SeekFrom::End(0))?;
203            if cat_start >= end {
204                return Err(DarError::Corrupt(format!(
205                    "catalogue start {cat_start} past archive end {end}"
206                )));
207            }
208            reader.seek(SeekFrom::Start(cat_start))?;
209            // Legacy catalogue: no 10-byte label, no path — entries begin here.
210            // When the archive is compressed, the catalogue is a single codec
211            // stream (the terminateur addresses its start); inflate it first.
212            if is_compressed(global_comp) {
213                let mut compressed = Vec::new();
214                reader
215                    .by_ref()
216                    .take(MAX_CATALOGUE_COMPRESSED)
217                    .read_to_end(&mut compressed)?;
218                let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
219                entries = parse_catalog(&mut Cursor::new(inflated), format_major, global_comp)?;
220            } else {
221                entries = parse_catalog(&mut reader, format_major, global_comp)?;
222            }
223        } else {
224            return Err(DarError::Corrupt(format!(
225                "unknown slice-header extension {extension:#04x}"
226            )));
227        }
228
229        Ok(Self {
230            inner: reader,
231            archive_origin,
232            format_major,
233            entries,
234        })
235    }
236
237    /// List all archived file entries (path and uncompressed size).
238    pub fn entries(&self) -> Vec<DarEntry> {
239        self.entries
240            .iter()
241            .map(|e| DarEntry {
242                path: e.path.clone(),
243                size: e.size,
244            })
245            .collect()
246    }
247
248    /// Extract a file by path, returning its raw bytes.
249    pub fn extract(&mut self, path: &str) -> Result<Vec<u8>, DarError> {
250        let entry = self
251            .entries
252            .iter()
253            .find(|e| e.path == path)
254            .ok_or_else(|| DarError::EntryNotFound(path.to_string()))?
255            .clone();
256
257        if entry.encrypted {
258            return Err(DarError::Corrupt(format!("'{path}' is encrypted")));
259        }
260
261        // The raw bytes live at archive_origin + archive_offset.  Both fields
262        // are attacker-controlled, so the sum must be checked, and the claimed
263        // size validated against the bytes that actually exist before any
264        // allocation — otherwise a forged stored_size is an allocation bomb.
265        let start = self
266            .archive_origin
267            .checked_add(entry.archive_offset)
268            .ok_or_else(|| {
269                DarError::Corrupt(format!("'{path}' archive offset overflows file position"))
270            })?;
271        let end = self.inner.seek(SeekFrom::End(0))?;
272        if start > end {
273            return Err(DarError::Corrupt(format!(
274                "'{path}' starts at {start}, past archive end {end}"
275            )));
276        }
277
278        // Format 1 stores no per-entry storage_size, so a compressed entry is a
279        // codec stream of unknown on-disk length (dar 1.x is gzip/zlib-only).
280        // Decode it straight from the archive — the decoder stops at the stream's
281        // natural end — bounded by the catalog `size`, rather than reading a
282        // fixed, synthesised length.
283        if self.format_major == 1 && is_compressed(entry.compression) {
284            self.inner.seek(SeekFrom::Start(start))?;
285            let out = read_bounded(
286                flate2::read::ZlibDecoder::new(&mut self.inner),
287                entry.size,
288                "zlib",
289            )?;
290            if out.len() as u64 != entry.size {
291                return Err(DarError::Corrupt(format!(
292                    "'{path}' decompressed to {} bytes but catalog declares {}",
293                    out.len(),
294                    entry.size
295                )));
296            }
297            return Ok(out);
298        }
299
300        let available = end - start;
301        if entry.stored_size > available {
302            return Err(DarError::Corrupt(format!(
303                "'{path}' claims {} stored bytes but only {available} remain",
304                entry.stored_size
305            )));
306        }
307
308        self.inner.seek(SeekFrom::Start(start))?;
309        let mut data = vec![0u8; entry.stored_size as usize];
310        self.inner.read_exact(&mut data)?;
311
312        if !is_compressed(entry.compression) {
313            return Ok(data);
314        }
315        // Each compressed entry is an independent stream; its uncompressed length
316        // is the catalog `size`, so decode exactly that and reject any mismatch —
317        // a forged stream cannot over-inflate past the declared size.
318        let out = decompress(&data, entry.compression, entry.size)?;
319        if out.len() as u64 != entry.size {
320            return Err(DarError::Corrupt(format!(
321                "'{path}' decompressed to {} bytes but catalog declares {}",
322                out.len(),
323                entry.size
324            )));
325        }
326        Ok(out)
327    }
328}
329
330// ── Catalog parser ────────────────────────────────────────────────────────────
331
332/// On archives larger than this, the catalog scan starts this many bytes
333/// before EOF (the catalog always lives at the tail), avoiding a full read of
334/// a multi-gigabyte forensic archive before falling back to a full scan.
335const TAIL_SCAN: u64 = 256 * 1024 * 1024;
336
337const CHUNK: usize = 4 * 1024 * 1024;
338// OVERLAP = max(SEQT_CATALOGUE.len(), label.len()) - 1; carries bytes across chunk boundaries.
339const OVERLAP: usize = 9;
340
341/// Scan forward from the current reader position searching for either the
342/// `seqt_catalogue` escape or the archive `label`.
343///
344/// Returns `Some(true)` if the escape was found (reader positioned just after it),
345/// `Some(false)` if the label was found (reader positioned just after it),
346/// `None` if EOF was reached without a match.
347fn scan_window<R: Read + Seek>(
348    r: &mut R,
349    label: &[u8; 10],
350    use_label: bool,
351) -> Result<Option<bool>, DarError> {
352    let mut buf = vec![0u8; CHUNK + OVERLAP];
353    let mut overlap_len: usize = 0;
354    loop {
355        let chunk_file_pos = r.stream_position()?;
356        let n = r.read(&mut buf[overlap_len..overlap_len + CHUNK])?;
357        if n == 0 {
358            break;
359        }
360        let total = overlap_len + n;
361        // buf[0..overlap_len]  → tail of previous chunk (file pos: chunk_file_pos - overlap_len)
362        // buf[overlap_len..total] → newly read bytes
363        let buf_base = chunk_file_pos - overlap_len as u64;
364
365        if let Some(i) = buf[..total]
366            .windows(SEQT_CATALOGUE.len())
367            .position(|w| w == SEQT_CATALOGUE)
368        {
369            r.seek(SeekFrom::Start(
370                buf_base + i as u64 + SEQT_CATALOGUE.len() as u64,
371            ))?;
372            return Ok(Some(true));
373        }
374        if use_label {
375            if let Some(i) = buf[..total]
376                .windows(label.len())
377                .position(|w| w == label.as_ref())
378            {
379                r.seek(SeekFrom::Start(buf_base + i as u64 + label.len() as u64))?;
380                return Ok(Some(false));
381            }
382        }
383
384        let keep = OVERLAP.min(total);
385        buf.copy_within(total - keep..total, 0);
386        overlap_len = keep;
387    }
388    Ok(None)
389}
390
391/// Locate the catalog section and position the reader at its first entry.
392///
393/// Returns `true` when the `seqt_catalogue` escape is found — the caller then
394/// skips the 10-byte in-catalog label and (format 11.1+) the path NUL string.
395/// The escape is a *sequential-read tape mark*; it is present only when the
396/// archive was written with tape marks (libdar's default).
397///
398/// Returns `false` when the catalog is located by its `ref_data_name` label
399/// directly. Archives written with tape marks disabled (e.g. by Passware Kit
400/// Mobile, equivalent to `dar -at`) omit the escape; their catalog still begins
401/// with the 10-byte `ref_data_name`, which equals the slice `label`, so scanning
402/// for `label` in the tail finds it — a structural marker, not a heuristic.
403///
404/// Returns `Err(Corrupt)` when neither marker is found.
405///
406/// Strategy: DAR catalogs always live at the tail of the archive.  On forensic
407/// archives ≥ 256 MiB we jump straight to the last 256 MiB and scan forward
408/// from there, then fall back to a full forward scan from `archive_origin` if
409/// needed.  This reduces the I/O for a 92 GiB archive from ~99 GiB to ~107 MiB.
410fn find_catalogue<R: Read + Seek>(r: &mut R, label: &[u8; 10]) -> Result<bool, DarError> {
411    find_catalogue_within(r, label, TAIL_SCAN)
412}
413
414/// Implementation of [`find_catalogue`] with the tail-scan window size as a
415/// parameter so the full-scan fallback can be exercised without a 256 MiB
416/// fixture.
417fn find_catalogue_within<R: Read + Seek>(
418    r: &mut R,
419    label: &[u8; 10],
420    tail_scan: u64,
421) -> Result<bool, DarError> {
422    // All-zero labels cannot be used as a reliable catalog marker (too common
423    // in zero-padded archive bodies).
424    let use_label = !label.iter().all(|&b| b == 0);
425
426    let archive_origin = r.stream_position()?;
427    let file_end = r.seek(SeekFrom::End(0))?;
428
429    if file_end <= archive_origin {
430        return Err(DarError::Corrupt("archive body too short".into()));
431    }
432
433    // Jump to at most tail_scan bytes before end; for small files this equals archive_origin.
434    let tail_start = archive_origin.max(file_end.saturating_sub(tail_scan));
435    r.seek(SeekFrom::Start(tail_start))?;
436
437    if let Some(result) = scan_window(r, label, use_label)? {
438        return Ok(result);
439    }
440
441    // Tail scan missed.  Fall back to a full scan from archive_origin.
442    if tail_start > archive_origin {
443        r.seek(SeekFrom::Start(archive_origin))?;
444        if let Some(result) = scan_window(r, label, use_label)? {
445            return Ok(result);
446        }
447    }
448
449    Err(DarError::Corrupt("seqt_catalogue not found".into()))
450}
451
452/// Read the NUL-terminated `version_string` at the current position and return
453/// `archive_version::value()` = `major*256 + fix`, where `major = b0*256 + b1`
454/// and each byte is `value + 48`. Format <= 7 stores only `"NN"` (fix implicitly
455/// 0); format 8+ stores `"NNf"`. Returns `u32::MAX` for an unreadable string so
456/// an unknown future format is treated as newest.
457fn read_format_value<R: Read>(r: &mut R) -> u32 {
458    let s = read_nul_string(r).unwrap_or_default();
459    let b = s.as_bytes();
460    if b.len() >= 2 {
461        let major = u32::from(b[0].saturating_sub(48)) * 256 + u32::from(b[1].saturating_sub(48));
462        let fix = if b.len() >= 3 {
463            u32::from(b[2].saturating_sub(48))
464        } else {
465            0
466        };
467        major * 256 + fix
468    } else {
469        u32::MAX
470    }
471}
472
473/// True when a libdar compression char names a known compression algorithm.
474/// `compression2char` emits the algorithm letter in lowercase for streamed mode
475/// and uppercase for per-block mode (`z`=gzip, `y`=bzip2, `x`=xz, `l`/`j`/`k`=lzo
476/// variants, `d`=zstd, `q`=lz4); `n` is stored. Any other byte — e.g. a header
477/// placeholder in a non-dar-produced archive — is treated as not compressed, so
478/// the catalogue/entry is read verbatim rather than mis-decoded.
479fn is_compressed(algo: u8) -> bool {
480    matches!(
481        algo.to_ascii_lowercase(),
482        b'z' | b'y' | b'x' | b'l' | b'j' | b'k' | b'd' | b'q'
483    )
484}
485
486/// Inflate one compressed stream, dispatching on the libdar codec char and
487/// rejecting output longer than `max_out` (decompression-bomb guard). Trailing
488/// bytes after the stream (e.g. the archive trailer) are ignored by the decoder.
489fn decompress(data: &[u8], algo: u8, max_out: u64) -> Result<Vec<u8>, DarError> {
490    match algo.to_ascii_lowercase() {
491        // dar's "gzip" is a raw zlib stream (78 xx), not a gzip (1f 8b) wrapper.
492        b'z' => read_bounded(flate2::read::ZlibDecoder::new(data), max_out, "zlib"),
493        b'y' => read_bounded(bzip2_rs::DecoderReader::new(data), max_out, "bzip2"),
494        b'x' => {
495            // lzma-rs is writer-driven and has no output cap, so a BoundedWriter
496            // enforces the same decompression-bomb guard the Read codecs get.
497            let mut input: &[u8] = data;
498            let mut out = BoundedWriter {
499                buf: Vec::new(),
500                max: max_out,
501            };
502            match lzma_rs::xz_decompress(&mut input, &mut out) {
503                Ok(()) => Ok(out.buf),
504                // The DAR trailer follows the catalogue's xz stream. lzma-rs
505                // fully decodes and validates the stream (blocks, index, CRC,
506                // footer magic) before rejecting trailing bytes, so on this one
507                // error the output is already complete and sound. Per-file
508                // extract passes exactly stored_size bytes and never trails.
509                // (String coupling is why lzma-rs is pinned to 0.3.x.)
510                Err(lzma_rs::error::Error::XzError(ref m))
511                    if m == "Unexpected data after last XZ block" =>
512                {
513                    Ok(out.buf)
514                }
515                Err(e) => Err(DarError::Corrupt(format!("xz decode failed: {e}"))),
516            }
517        }
518        other => Err(DarError::Corrupt(format!(
519            "unsupported compression '{}'",
520            other as char
521        ))),
522    }
523}
524
525/// A `Write` sink that buffers up to `max` bytes and then fails, capping the
526/// output of a writer-driven decoder (lzma-rs) against a decompression bomb.
527struct BoundedWriter {
528    buf: Vec<u8>,
529    max: u64,
530}
531
532impl Write for BoundedWriter {
533    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
534        if self.buf.len() as u64 + data.len() as u64 > self.max {
535            return Err(std::io::Error::other("decompressed data exceeds bound"));
536        }
537        self.buf.extend_from_slice(data);
538        Ok(data.len())
539    }
540
541    fn flush(&mut self) -> std::io::Result<()> {
542        Ok(())
543    }
544}
545
546/// Read a decoder to EOF, capping output at `max_out` bytes (one extra byte is
547/// requested so an over-long stream is detected, not silently truncated).
548fn read_bounded<R: Read>(decoder: R, max_out: u64, what: &str) -> Result<Vec<u8>, DarError> {
549    let mut out = Vec::new();
550    decoder
551        .take(max_out.saturating_add(1))
552        .read_to_end(&mut out)
553        .map_err(|e| DarError::Corrupt(format!("{what} decode failed: {e}")))?;
554    if out.len() as u64 > max_out {
555        return Err(DarError::Corrupt("decompressed data exceeds bound".into()));
556    }
557    Ok(out)
558}
559
560/// Locate the catalogue in a pre-format-8 archive via the end `terminateur`
561/// trailer (libdar terminateur.cpp:95-138), returning the catalogue start offset
562/// relative to `archive_origin`.
563///
564/// From EOF, count trailing `0xFF` padding bytes (8 bits each); the first
565/// non-`0xFF` byte encodes the remaining count in unary as its set high bits.
566/// `byte_offset = total_bits * 4` is the distance back from that byte to the
567/// catalogue-position infinint. The `0xFF` run is bounded so a hostile all-`0xFF`
568/// tail cannot spin or overflow.
569fn read_terminateur<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
570    const BLOCK_SIZE: u64 = 4;
571    const MAX_BITS: u64 = 4096; // far beyond any real terminator
572
573    let mut pos = r.seek(SeekFrom::End(0))?;
574    let mut bits: u64 = 0;
575    let terminal = loop {
576        if pos == 0 {
577            return Err(DarError::Corrupt("terminator underflows archive".into()));
578        }
579        pos -= 1;
580        r.seek(SeekFrom::Start(pos))?;
581        let b = read_u8(r)?;
582        if b == 0xFF {
583            bits += 8;
584            if bits > MAX_BITS {
585                return Err(DarError::Corrupt("terminator padding too long".into()));
586            }
587        } else {
588            break b;
589        }
590    };
591    // The terminator byte must have its top bit set; count consecutive set MSBs.
592    if terminal & 0x80 == 0 {
593        return Err(DarError::Corrupt(format!(
594            "invalid terminator byte {terminal:#04x}"
595        )));
596    }
597    let mut x = terminal;
598    while x != 0 {
599        if x & 0x80 == 0 {
600            return Err(DarError::Corrupt("malformed terminator bit run".into()));
601        }
602        bits += 1;
603        x <<= 1;
604    }
605    let byte_offset = bits * BLOCK_SIZE;
606    let infinint_start = pos
607        .checked_sub(byte_offset)
608        .ok_or_else(|| DarError::Corrupt("terminator offset underflows".into()))?;
609    r.seek(SeekFrom::Start(infinint_start))?;
610    read_infinint(r)
611}
612
613/// Parse all catalog entries, returning file entries with their extraction info.
614///
615/// Stops when the root directory is closed (depth reaches zero) or an unknown
616/// entry type is encountered (slice trailer).
617fn parse_catalog<R: Read + Seek>(
618    r: &mut R,
619    format_major: u32,
620    global_comp: u8,
621) -> Result<Vec<EntryRef>, DarError> {
622    let mut entries = Vec::new();
623    let mut dir_stack: Vec<String> = Vec::new();
624    let mut depth: u32 = 0;
625
626    loop {
627        let mut buf = [0u8; 1];
628        match r.read_exact(&mut buf) {
629            Ok(()) => {}
630            Err(_) => break,
631        }
632
633        // Lower 5 bits of cat_sig + 0x60 gives the ASCII type letter.
634        let entry_type = ((buf[0] & 0x1f) | 0x60) as char;
635
636        match entry_type {
637            'z' => {
638                // End of directory
639                depth = depth.saturating_sub(1);
640                dir_stack.pop();
641                if depth == 0 {
642                    break;
643                }
644            }
645            'd' => {
646                let name = read_nul_string(r)?;
647                let flags = read_inode_base(r, format_major)?;
648                if format_major >= 9 && (flags >> 4) & 1 != 0 {
649                    skip_fsa(r)?;
650                }
651                depth += 1;
652                // <ROOT> is a virtual root; don't include it in file paths.
653                if name != "<ROOT>" {
654                    dir_stack.push(name);
655                }
656            }
657            'f' => {
658                let name = read_nul_string(r)?;
659                let flags = read_inode_base(r, format_major)?;
660                if format_major >= 9 && (flags >> 4) & 1 != 0 {
661                    skip_fsa(r)?;
662                }
663
664                let size = read_infinint(r)?;
665                let archive_offset = read_infinint(r)?;
666                // Per-entry layout differs by format (libdar cat_file.cpp / crc.cpp):
667                // - 8+: storage_size · enc(1) · comp(1) · length-prefixed CRC.
668                // - 2-7: storage_size · fixed 2-byte CRC; NO enc/comp byte — the
669                //   archive-global codec applies to every entry.
670                // - 1: size · offset only — no storage_size, CRC, or codec byte;
671                //   storage_size is synthesised and the global codec applies.
672                let (mut stored_size, encryption_flag, compression) = if format_major >= 8 {
673                    let ss = read_infinint(r)?;
674                    let enc = read_u8(r)?;
675                    let comp = read_u8(r)?;
676                    let crc_size = read_infinint(r)?;
677                    skip(r, crc_size)?;
678                    (ss, enc, comp)
679                } else if format_major >= 2 {
680                    let ss = read_infinint(r)?;
681                    skip(r, 2)?; // fixed 2-byte CRC
682                    (ss, 0u8, global_comp)
683                } else {
684                    (size, 0u8, global_comp) // format 1: storage_size synthesised
685                };
686                // Pre-8: storage_size 0 means the data is stored uncompressed.
687                if format_major <= 7 && stored_size == 0 {
688                    stored_size = size;
689                }
690
691                let path = if dir_stack.is_empty() {
692                    name
693                } else {
694                    format!("{}/{}", dir_stack.join("/"), name)
695                };
696
697                entries.push(EntryRef {
698                    path,
699                    size,
700                    archive_offset,
701                    stored_size,
702                    compression,
703                    encrypted: encryption_flag != 0,
704                });
705            }
706            'l' => {
707                // Symbolic link: inode + NUL-terminated target path; not extractable.
708                let _name = read_nul_string(r)?;
709                let flags = read_inode_base(r, format_major)?;
710                if format_major >= 9 && (flags >> 4) & 1 != 0 {
711                    skip_fsa(r)?;
712                }
713                skip_nul_string(r)?; // symlink target
714            }
715            'p' | 's' => {
716                // Named pipe (FIFO) / unix socket: a bare inode, no data and no
717                // type-specific fields. Skip it so catalog parsing continues past
718                // it to later files (real full-filesystem archives contain these).
719                let _name = read_nul_string(r)?;
720                let flags = read_inode_base(r, format_major)?;
721                if format_major >= 9 && (flags >> 4) & 1 != 0 {
722                    skip_fsa(r)?;
723                }
724            }
725            _ => break, // unknown type = slice trailer or unhandled entry
726        }
727    }
728
729    Ok(entries)
730}
731
732// ── Low-level I/O helpers ─────────────────────────────────────────────────────
733
734/// Read a DAR variable-length infinint, decoded to `u64`.
735///
736/// Format (TG=4): optional leading `0x00` skip-bytes, then a terminal byte
737/// with exactly one bit set; `pos = terminal.leading_zeros()` and the value
738/// occupies `(skip_count * 8 + pos + 1) * 4` big-endian bytes.
739///
740/// A `u64` holds at most 8 data bytes.  Any encoding wider than that — i.e.
741/// *any* leading `0x00` (which alone implies ≥ 36 bytes) or a terminal below
742/// `0x40` (`pos > 1`) — cannot be represented and is rejected as `Corrupt`
743/// rather than silently truncated.  This single bound also removes the
744/// `(skip * 8 …)` arithmetic-overflow panic and caps the leading-zero scan, so
745/// a malicious all-zero run can never spin or overflow the skip counter.
746fn read_infinint<R: Read>(r: &mut R) -> Result<u64, DarError> {
747    let terminal = read_u8(r)?;
748    if terminal == 0x00 {
749        // A skip-byte group is at least 36 data bytes — far beyond u64.
750        return Err(DarError::Corrupt(
751            "infinint exceeds 64-bit range (multi-group encoding)".into(),
752        ));
753    }
754    if terminal.count_ones() != 1 {
755        return Err(DarError::Corrupt(format!(
756            "invalid infinint terminal: {terminal:#04x}"
757        )));
758    }
759    let pos = terminal.leading_zeros(); // 0 ..= 7
760    if pos > 1 {
761        // data_bytes = (pos + 1) * 4 > 8 → does not fit in u64.
762        return Err(DarError::Corrupt(format!(
763            "infinint exceeds 64-bit range: terminal {terminal:#04x} implies {} bytes",
764            (pos + 1) * 4
765        )));
766    }
767    let data_bytes = (pos + 1) * 4; // 4 (terminal 0x80) or 8 (terminal 0x40)
768    let mut val: u64 = 0;
769    for _ in 0..data_bytes {
770        val = (val << 8) | u64::from(read_u8(r)?);
771    }
772    Ok(val)
773}
774
775fn read_u8<R: Read>(r: &mut R) -> Result<u8, DarError> {
776    let mut b = [0u8; 1];
777    r.read_exact(&mut b)?;
778    Ok(b[0])
779}
780
781/// Upper bound on a NUL-terminated path/name field.  Real DAR entries stay
782/// well under this; the cap stops a NUL-free region of a hostile archive from
783/// growing the buffer until EOF (or OOM on a multi-GiB stream).
784const MAX_NUL_STRING: usize = 64 * 1024;
785
786/// Read a NUL-terminated UTF-8 string, consuming the NUL byte.
787fn read_nul_string<R: Read>(r: &mut R) -> Result<String, DarError> {
788    let mut bytes = Vec::new();
789    loop {
790        let b = read_u8(r)?;
791        if b == 0 {
792            break;
793        }
794        if bytes.len() >= MAX_NUL_STRING {
795            return Err(DarError::Corrupt(format!(
796                "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
797            )));
798        }
799        bytes.push(b);
800    }
801    String::from_utf8(bytes).map_err(|e| DarError::Corrupt(e.to_string()))
802}
803
804/// Skip a NUL-terminated string without collecting the bytes.
805fn skip_nul_string<R: Read>(r: &mut R) -> Result<(), DarError> {
806    let mut len: usize = 0;
807    loop {
808        if read_u8(r)? == 0 {
809            return Ok(());
810        }
811        len += 1;
812        if len > MAX_NUL_STRING {
813            return Err(DarError::Corrupt(format!(
814                "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
815            )));
816        }
817    }
818}
819
820/// Seek past `n` bytes.
821fn skip<R: Seek>(r: &mut R, n: u64) -> Result<(), DarError> {
822    if n > 0 {
823        // `SeekFrom::Current` takes an i64; a value above i64::MAX would cast to
824        // a negative offset and seek *backwards* (re-reading earlier bytes on a
825        // File).  No real DAR field is that large — reject it outright.
826        let off = i64::try_from(n)
827            .map_err(|_| DarError::Corrupt(format!("skip length {n} exceeds seekable range")))?;
828        r.seek(SeekFrom::Current(off)).map_err(DarError::Io)?;
829    }
830    Ok(())
831}
832
833/// Skip one DAR timestamp field.
834///
835/// Timestamps are prefixed with a type byte:
836/// - `'s'` (0x73) and others: seconds only — one infinint follows
837/// - `'n'` (0x6e): nanosecond precision — two infinints follow (seconds + nanoseconds)
838fn skip_timestamp<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<(), DarError> {
839    // Format 8 and earlier store a bare seconds infinint with NO precision byte
840    // (libdar datetime.cpp:372). Format 9+ prefix a unit byte ('s' seconds,
841    // 'u' microsecond, 'n' nanosecond); sub-second units add a second infinint.
842    if format_major < 9 {
843        read_infinint(r)?;
844        return Ok(());
845    }
846    let ts_type = read_u8(r)?;
847    read_infinint(r)?;
848    if ts_type == b'n' || ts_type == b'u' {
849        read_infinint(r)?;
850    }
851    Ok(())
852}
853
854/// Read the inode flags byte and seek past the remaining inode fields.
855///
856/// Base layout: flags(1) + uid(inf) + gid(inf) + perms(2) + 3 timestamps
857///   Each timestamp: see [`skip_timestamp`] (version-dependent).
858///   FSA inode fields (format 9+ only): two infinints when (flags >> 4) & 1 == 1.
859fn read_inode_base<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<u8, DarError> {
860    // Format 1 predates extended attributes and has NO leading flag byte
861    // (libdar cat_inode.cpp); formats 2+ store it. Synthesise 0 for format 1.
862    let flags = if format_major >= 2 { read_u8(r)? } else { 0 };
863    // uid/gid: 2-byte u16 for format <= 7 (libdar cat_inode.cpp:171), infinint for 8+.
864    if format_major <= 7 {
865        skip(r, 4)?; // uid (u16) + gid (u16)
866    } else {
867        read_infinint(r)?; // uid
868        read_infinint(r)?; // gid
869    }
870    skip(r, 2)?; // perms (always a 2-byte big-endian u16, never an infinint)
871    skip_timestamp(r, format_major)?; // atime
872    skip_timestamp(r, format_major)?; // mtime
873                                      // ctime (last_cha) exists only from format 8 (libdar cat_inode.cpp:197).
874    if format_major >= 8 {
875        skip_timestamp(r, format_major)?;
876    }
877    // FSA inode fields exist only from format 9 (libdar cat_inode.cpp:264); bit
878    // 0x10 is the FSA-full status. Formats <= 8 have no FSA.
879    if format_major >= 9 && (flags >> 4) & 1 != 0 {
880        read_infinint(r)?;
881        read_infinint(r)?;
882    }
883    Ok(flags)
884}
885
886/// Skip one FSA (filesystem attributes) block.
887///
888/// Format: infinint(family_tag) + infinint(data_size) + data_size bytes.
889fn skip_fsa<R: Read + Seek>(r: &mut R) -> Result<(), DarError> {
890    let _tag = read_infinint(r)?;
891    let size = read_infinint(r)?;
892    skip(r, size)
893}
894
895// ── Unit tests ────────────────────────────────────────────────────────────────
896
897#[cfg(test)]
898mod tests {
899    use super::*;
900    use std::io::Cursor;
901
902    // ── read_infinint ─────────────────────────────────────────────────────────
903
904    #[test]
905    fn infinint_decodes_value() {
906        let data = [0x80u8, 0x00, 0x00, 0x00, 0x0d];
907        assert_eq!(read_infinint(&mut Cursor::new(&data[..])).unwrap(), 13);
908    }
909
910    #[test]
911    fn infinint_bad_preamble_returns_corrupt() {
912        // 0x03 = two bits set — not a valid infinint terminal.
913        let data = [0x03u8, 0x00, 0x00, 0x00, 0x00];
914        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
915        assert!(matches!(&err, DarError::Corrupt(_)));
916    }
917
918    #[test]
919    fn infinint_truncated_returns_io() {
920        // Only 2 bytes — read_exact needs 5.
921        let err = read_infinint(&mut Cursor::new(&[0x80u8, 0x00][..])).unwrap_err();
922        assert!(matches!(err, DarError::Io(_)));
923    }
924
925    #[test]
926    fn infinint_0x40_preamble_reads_8_data_bytes() {
927        // 0x40 terminal: leading_zeros=1, pos=1, data_bytes=(0*8+1+1)*4=8
928        // Encodes the value 0x5d15_9331 in 8 big-endian bytes.
929        let mut data = vec![0x40u8];
930        data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x5d, 0x15, 0x93, 0x31]);
931        assert_eq!(
932            read_infinint(&mut Cursor::new(data)).unwrap(),
933            0x5d15_9331u64
934        );
935    }
936
937    #[test]
938    fn infinint_multi_bit_terminal_returns_corrupt() {
939        // 0x60 = 0110_0000 — two bits set, not a valid terminal.
940        let data = [0x60u8, 0x00, 0x00, 0x00, 0x00];
941        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
942        assert!(matches!(&err, DarError::Corrupt(_)));
943    }
944
945    // ── read_u8 ───────────────────────────────────────────────────────────────
946
947    #[test]
948    fn read_u8_reads_single_byte() {
949        assert_eq!(read_u8(&mut Cursor::new(&[0x42u8][..])).unwrap(), 0x42);
950    }
951
952    #[test]
953    fn read_u8_eof_returns_io() {
954        let err = read_u8(&mut Cursor::new(&[][..])).unwrap_err();
955        assert!(matches!(err, DarError::Io(_)));
956    }
957
958    // ── read_nul_string ───────────────────────────────────────────────────────
959
960    #[test]
961    fn nul_string_reads_until_nul() {
962        let data = b"hello\x00world";
963        assert_eq!(
964            read_nul_string(&mut Cursor::new(&data[..])).unwrap(),
965            "hello"
966        );
967    }
968
969    #[test]
970    fn nul_string_invalid_utf8_returns_corrupt() {
971        // 0xFF 0x80 is not valid UTF-8; 0x00 terminates.
972        let data = [0xFF, 0x80, 0x00];
973        let err = read_nul_string(&mut Cursor::new(&data[..])).unwrap_err();
974        assert!(matches!(err, DarError::Corrupt(_)));
975    }
976
977    #[test]
978    fn nul_string_eof_before_nul_returns_io() {
979        let err = read_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
980        assert!(matches!(err, DarError::Io(_)));
981    }
982
983    // ── skip_nul_string ───────────────────────────────────────────────────────
984
985    #[test]
986    fn skip_nul_string_advances_past_nul() {
987        let data = b"skip\x00rest";
988        let mut c = Cursor::new(data.to_vec());
989        skip_nul_string(&mut c).unwrap();
990        assert_eq!(c.position(), 5); // "skip\0" = 5 bytes consumed
991    }
992
993    #[test]
994    fn skip_nul_string_eof_returns_io() {
995        let err = skip_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
996        assert!(matches!(err, DarError::Io(_)));
997    }
998
999    // ── find_catalogue ────────────────────────────────────────────────────────
1000
1001    #[test]
1002    fn find_catalogue_body_too_short() {
1003        // Fewer than 6 bytes — can't fill the initial window; label also too short.
1004        let label = [0u8; 10];
1005        let err = find_catalogue(&mut Cursor::new(&[0x01u8, 0x02, 0x03][..]), &label).unwrap_err();
1006        assert!(
1007            matches!(&err, DarError::Corrupt(s) if s == "archive body too short"
1008            || s == "seqt_catalogue not found")
1009        );
1010    }
1011
1012    #[test]
1013    fn find_catalogue_escape_at_start() {
1014        let mut data = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43, 0xFF];
1015        let mut c = Cursor::new(&mut data[..]);
1016        let via_escape = find_catalogue(&mut c, &[0u8; 10]).unwrap();
1017        assert!(via_escape);
1018        assert_eq!(c.position(), 6);
1019    }
1020
1021    #[test]
1022    fn find_catalogue_escape_not_found() {
1023        // 10 bytes of zeros, label is 0xFF×10 so label scan also fails.
1024        let label = [0xFFu8; 10];
1025        let err = find_catalogue(&mut Cursor::new(&[0u8; 10][..]), &label).unwrap_err();
1026        assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1027    }
1028
1029    #[test]
1030    fn find_catalogue_label_fallback() {
1031        let label: [u8; 10] = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A];
1032        // Prefix junk (no escape) followed by the label bytes.
1033        let mut data = vec![0x00u8; 5];
1034        data.extend_from_slice(&label);
1035        let mut c = Cursor::new(data);
1036        let via_escape = find_catalogue(&mut c, &label).unwrap();
1037        assert!(!via_escape);
1038        assert_eq!(c.position(), 15); // 5 junk + 10 label consumed
1039    }
1040
1041    // ── skip ──────────────────────────────────────────────────────────────────
1042
1043    #[test]
1044    fn skip_zero_does_not_move_cursor() {
1045        let mut c = Cursor::new(vec![0xFFu8; 10]);
1046        skip(&mut c, 0).unwrap();
1047        assert_eq!(c.position(), 0);
1048    }
1049
1050    #[test]
1051    fn skip_n_advances_cursor() {
1052        let mut c = Cursor::new(vec![0xFFu8; 10]);
1053        skip(&mut c, 7).unwrap();
1054        assert_eq!(c.position(), 7);
1055    }
1056
1057    // ── read_inode_base ───────────────────────────────────────────────────────
1058
1059    #[test]
1060    fn inode_base_bit4_clear_reads_31_bytes() {
1061        // flags(1) + uid(5) + gid(5) + perms(2) + 3×[type(1)+secs(5)] = 31 bytes
1062        let mut data = vec![0x00u8]; // flags (bit4=0)
1063        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // uid
1064        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // gid
1065        data.extend_from_slice(&[0x00, 0x00]); // perms
1066        for _ in 0..3 {
1067            data.push(b's'); // timestamp type
1068            data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // seconds
1069        }
1070        data.push(0xFF); // sentinel — must not be consumed
1071        let mut c = Cursor::new(data);
1072        assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x00);
1073        assert_eq!(c.position(), 31);
1074    }
1075
1076    #[test]
1077    fn inode_base_bit4_set_reads_41_bytes() {
1078        // flags(1) + uid(5) + gid(5) + perms(2) + 3×[type(1)+secs(5)] + nlink(5) + field9(5) = 41
1079        let mut data = vec![0x10u8]; // flags (bit4=1)
1080        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // uid
1081        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // gid
1082        data.extend_from_slice(&[0x00, 0x00]); // perms
1083        for _ in 0..3 {
1084            data.push(b's');
1085            data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]);
1086        }
1087        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // nlink
1088        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); // field9
1089        data.push(0xFF); // sentinel
1090        let mut c = Cursor::new(data);
1091        assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x10);
1092        assert_eq!(c.position(), 41);
1093    }
1094
1095    // ── skip_fsa ─────────────────────────────────────────────────────────────
1096
1097    #[test]
1098    fn skip_fsa_consumes_tag_size_and_data() {
1099        // tag=infinint(5) + size=infinint(3) + 3 data bytes
1100        let mut data = Vec::new();
1101        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x05]); // tag
1102        data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x03]); // size=3
1103        data.extend_from_slice(&[0xAA, 0xBB, 0xCC]); // data
1104        data.push(0xFF); // sentinel
1105        let mut c = Cursor::new(data);
1106        skip_fsa(&mut c).unwrap();
1107        assert_eq!(c.position(), 13); // 5 + 5 + 3 = 13
1108    }
1109
1110    // ── hardening: malicious / corrupted infinint encodings ───────────────────
1111    //
1112    // A `u64` holds at most 8 data bytes.  The reader's contract is "decode to
1113    // u64 or return Corrupt" — it must never silently truncate an over-wide
1114    // value, overflow while computing the byte count, or loop on a zero run.
1115
1116    #[test]
1117    fn infinint_leading_zero_byte_returns_corrupt() {
1118        // A leading 0x00 skip-byte implies a ≥36-byte group — far beyond u64.
1119        // Must be rejected as Corrupt, not mislabelled as an I/O shortage.
1120        let data = [0x00u8, 0x80, 0x00, 0x00, 0x00, 0x00];
1121        let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
1122        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1123    }
1124
1125    #[test]
1126    fn infinint_12_byte_group_exceeds_u64_returns_corrupt() {
1127        // 0x20 terminal → pos=2 → 12 data bytes → cannot fit in u64.
1128        // Must error rather than silently truncate to a wrong value.
1129        let mut data = vec![0x20u8];
1130        data.extend_from_slice(&[0x11; 12]);
1131        let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1132        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1133    }
1134
1135    #[test]
1136    fn infinint_all_zero_run_returns_corrupt_without_hanging() {
1137        // A run of zero bytes must terminate promptly with Corrupt, never spin
1138        // consuming the whole stream (and never overflow-panic the skip count).
1139        let data = vec![0u8; 4096];
1140        let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1141        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1142    }
1143
1144    // ── hardening: unbounded NUL-terminated strings ───────────────────────────
1145
1146    #[test]
1147    fn nul_string_without_terminator_is_length_bounded() {
1148        // No NUL in 200 KiB of data: must be rejected once the path cap is hit,
1149        // not grow the buffer until EOF (or OOM on a multi-GiB stream).
1150        let data = vec![b'A'; 200_000];
1151        let err = read_nul_string(&mut Cursor::new(data)).unwrap_err();
1152        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1153    }
1154
1155    #[test]
1156    fn skip_nul_string_without_terminator_is_length_bounded() {
1157        let data = vec![b'A'; 200_000];
1158        let err = skip_nul_string(&mut Cursor::new(data)).unwrap_err();
1159        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1160    }
1161
1162    // ── hardening: skip must never seek backwards ─────────────────────────────
1163
1164    #[test]
1165    fn skip_value_above_i64_max_returns_corrupt() {
1166        // n > i64::MAX casts to a negative i64 → SeekFrom::Current would seek
1167        // *backwards* on a File (re-reading earlier bytes).  Must be rejected,
1168        // and the stream position must not move.
1169        let mut c = Cursor::new(vec![0u8; 64]);
1170        c.set_position(32);
1171        let err = skip(&mut c, 0x8000_0000_0000_0000).unwrap_err();
1172        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1173        assert_eq!(c.position(), 32); // unchanged on a rejected skip
1174    }
1175
1176    // ── terminateur trailer (pre-8 catalog locator) ───────────────────────────
1177
1178    #[test]
1179    fn terminateur_reads_catalogue_offset() {
1180        // pos infinint 0x18 = 24; terminator 0xc0 → two leading ones → 2*4 = 8
1181        // bytes back to the infinint.
1182        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xc0];
1183        assert_eq!(read_terminateur(&mut Cursor::new(data)).unwrap(), 24);
1184    }
1185
1186    #[test]
1187    fn terminateur_all_ff_underflows_returns_corrupt() {
1188        let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 4])).unwrap_err();
1189        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1190    }
1191
1192    #[test]
1193    fn terminateur_excessive_ff_padding_returns_corrupt() {
1194        let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 600])).unwrap_err();
1195        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1196    }
1197
1198    #[test]
1199    fn terminateur_low_terminator_byte_returns_corrupt() {
1200        // Terminator byte 0x01 has no top bit set.
1201        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x01];
1202        let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1203        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1204    }
1205
1206    #[test]
1207    fn terminateur_noncontiguous_high_bits_returns_corrupt() {
1208        // 0xA0 = 1010_0000: top bit set but the high-bit run is not contiguous.
1209        let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0xA0];
1210        let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1211        assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1212    }
1213
1214    // ── find_catalogue: full-scan fallback + body-too-short ────────────────────
1215
1216    #[test]
1217    fn find_catalogue_falls_back_to_full_scan() {
1218        // Escape near the start; a tiny tail window misses it, forcing the
1219        // archive_origin full-scan fallback.
1220        let mut data = vec![0x11u8, 0x22]; // junk before the escape
1221        data.extend_from_slice(&SEQT_CATALOGUE);
1222        data.extend_from_slice(&[0x33u8; 12]); // trailing bytes beyond the tail window
1223        let mut c = Cursor::new(data);
1224        let via_escape = find_catalogue_within(&mut c, &[0u8; 10], 4).unwrap();
1225        assert!(via_escape);
1226        assert_eq!(c.position(), 2 + SEQT_CATALOGUE.len() as u64);
1227    }
1228
1229    #[test]
1230    fn find_catalogue_full_scan_miss_returns_not_found() {
1231        // No escape and no matching label anywhere; a tiny tail window forces
1232        // the full-scan fallback, which also misses → "not found".
1233        let mut c = Cursor::new(vec![0x11u8; 16]);
1234        let err = find_catalogue_within(&mut c, &[0xABu8; 10], 4).unwrap_err();
1235        assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1236    }
1237
1238    #[test]
1239    fn find_catalogue_body_too_short_when_origin_at_eof() {
1240        let mut c = Cursor::new(vec![0u8; 6]);
1241        c.seek(SeekFrom::Start(6)).unwrap();
1242        let err = find_catalogue(&mut c, &[0u8; 10]).unwrap_err();
1243        assert!(matches!(&err, DarError::Corrupt(s) if s == "archive body too short"));
1244    }
1245
1246    // ── decompress ─────────────────────────────────────────────────────────────
1247
1248    #[test]
1249    fn decompress_rejects_decompression_bomb() {
1250        use flate2::{write::ZlibEncoder, Compression};
1251        use std::io::Write;
1252        let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
1253        enc.write_all(&[0u8; 4096]).unwrap();
1254        let blob = enc.finish().unwrap();
1255        // Inflates to 4096 bytes but the caller caps output at 16.
1256        let err = decompress(&blob, b'z', 16).unwrap_err();
1257        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("exceeds bound")));
1258    }
1259
1260    #[test]
1261    fn decompress_rejects_malformed_zlib() {
1262        let err = decompress(b"not a zlib stream at all", b'z', 1024).unwrap_err();
1263        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zlib decode failed")));
1264    }
1265
1266    #[test]
1267    fn decompress_rejects_malformed_xz() {
1268        let err = decompress(b"this is not an xz stream", b'x', 1024).unwrap_err();
1269        assert!(matches!(&err, DarError::Corrupt(s) if s.contains("xz decode failed")));
1270    }
1271
1272    #[test]
1273    fn bounded_writer_caps_output_and_flushes() {
1274        let mut w = BoundedWriter {
1275            buf: Vec::new(),
1276            max: 4,
1277        };
1278        assert_eq!(w.write(b"ab").unwrap(), 2); // within bound
1279        w.flush().unwrap();
1280        let err = w.write(b"cde").unwrap_err(); // 2 + 3 > 4
1281        assert_eq!(err.to_string(), "decompressed data exceeds bound");
1282        assert_eq!(w.buf, b"ab");
1283    }
1284}