Skip to main content

tar_core/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(missing_docs)]
3#![cfg_attr(not(feature = "std"), no_std)]
4//! Sans-IO tar parsing for sync and async runtimes.
5//!
6//! `tar-core` provides zero-copy parsing and building of tar archives that works
7//! with any I/O model. The [`parse::Parser`] has no trait bounds on readers—it
8//! just processes byte slices. This enables code sharing between sync crates
9//! like [tar-rs](https://crates.io/crates/tar) and async crates like
10//! [tokio-tar](https://crates.io/crates/tokio-tar).
11//!
12//! All header structs use the [`zerocopy`] crate for safe, efficient
13//! memory-mapped access without allocations. Supports POSIX.1-1988, UStar
14//! (POSIX.1-2001), and GNU tar formats.
15//!
16//! # Header Formats
17//!
18//! Tar archives have evolved through several formats:
19//!
20//! - **Old (POSIX.1-1988)**: The original Unix tar format with basic fields
21//! - **UStar (POSIX.1-2001)**: Adds `magic`/`version`, user/group names, and path prefix
22//! - **GNU tar**: Extends UStar with sparse file support and long name/link extensions
23//!
24//! # Example
25//!
26//! ```
27//! use tar_core::{Header, EntryType};
28//!
29//! // Parse a header from raw bytes
30//! let data = [0u8; 512]; // Would normally come from a tar file
31//! let header = Header::from_bytes(&data);
32//!
33//! // Access header fields
34//! let entry_type = header.entry_type();
35//! let path = header.path_bytes();
36//! ```
37//!
38//! # Parsing
39//!
40//! For parsing complete tar archives with automatic handling of GNU and PAX
41//! extensions, see the sans-IO [`parse`] module. It also contains security
42//! [`parse::Limits`] and the [`parse::ParseError`] type.
43
44extern crate alloc;
45
46pub mod builder;
47pub mod parse;
48
49pub use builder::{
50    blocks_for_size, EntryBuilder, ExtensionMode, HeaderBuilder, PaxBuilder, LINKNAME_MAX_LEN,
51    NAME_MAX_LEN,
52};
53
54use alloc::format;
55use alloc::string::String;
56use alloc::vec::Vec;
57use core::fmt;
58
59use thiserror::Error;
60use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout};
61
62/// Size of a tar header block in bytes.
63pub const HEADER_SIZE: usize = 512;
64
65/// Magic string for UStar format headers ("ustar\0").
66pub const USTAR_MAGIC: &[u8; 6] = b"ustar\0";
67
68/// Version field for UStar format headers ("00").
69pub const USTAR_VERSION: &[u8; 2] = b"00";
70
71/// Magic string for GNU tar format headers ("ustar ").
72pub const GNU_MAGIC: &[u8; 6] = b"ustar ";
73
74/// Version field for GNU tar format headers (" \0").
75pub const GNU_VERSION: &[u8; 2] = b" \0";
76
77/// Errors that can occur when parsing or building tar headers.
78#[derive(Debug, Error)]
79pub enum HeaderError {
80    /// The provided data is too short to contain a header.
81    #[error("insufficient data: expected {HEADER_SIZE} bytes, got {0}")]
82    InsufficientData(usize),
83
84    /// An octal field contains invalid characters.
85    #[error("invalid octal field: {0:?}")]
86    InvalidOctal(Vec<u8>),
87
88    /// A value is too large or too long for its header field.
89    #[error("value overflows {field_len}-byte field: {detail}")]
90    FieldOverflow {
91        /// Size of the target field in bytes.
92        field_len: usize,
93        /// Human-readable description of the overflow.
94        detail: String,
95    },
96
97    /// The header checksum does not match the computed value.
98    #[error("checksum mismatch: expected {expected}, computed {computed}")]
99    ChecksumMismatch {
100        /// The checksum value stored in the header.
101        expected: u64,
102        /// The checksum computed from the header bytes.
103        computed: u64,
104    },
105}
106
107/// Result type for header parsing operations.
108pub type Result<T> = core::result::Result<T, HeaderError>;
109
110// ============================================================================
111// Header Structs
112// ============================================================================
113
114/// Old-style (POSIX.1-1988) tar header with named fields.
115///
116/// This represents the original Unix tar format. Fields after `linkname`
117/// are undefined in this format and may contain garbage. See module-level
118/// documentation for the field layout table.
119#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)]
120#[repr(C)]
121pub struct OldHeader {
122    /// File path name (null-terminated if shorter than 100 bytes).
123    pub name: [u8; 100],
124    /// File mode in octal ASCII.
125    pub mode: [u8; 8],
126    /// Owner user ID in octal ASCII.
127    pub uid: [u8; 8],
128    /// Owner group ID in octal ASCII.
129    pub gid: [u8; 8],
130    /// File size in octal ASCII.
131    pub size: [u8; 12],
132    /// Modification time as Unix timestamp in octal ASCII.
133    pub mtime: [u8; 12],
134    /// Header checksum in octal ASCII.
135    pub cksum: [u8; 8],
136    /// Entry type flag (called `linkflag` in the original V7 format).
137    pub linkflag: [u8; 1],
138    /// Link target name for hard/symbolic links.
139    pub linkname: [u8; 100],
140    /// Padding to fill the 512-byte block.
141    pub pad: [u8; 255],
142}
143
144impl Default for OldHeader {
145    fn default() -> Self {
146        Self::new_zeroed()
147    }
148}
149
150impl fmt::Debug for OldHeader {
151    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152        f.debug_struct("OldHeader")
153            .field("name", &String::from_utf8_lossy(truncate_null(&self.name)))
154            .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode)))
155            .field("linkflag", &self.linkflag[0])
156            .finish_non_exhaustive()
157    }
158}
159
160/// UStar (POSIX.1-2001) tar header format.
161///
162/// This format adds a magic number, version, user/group names, device
163/// numbers for special files, and a path prefix for long filenames.
164/// See module-level documentation for the field layout table.
165#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)]
166#[repr(C)]
167pub struct UstarHeader {
168    /// File path name (null-terminated if shorter than 100 bytes).
169    pub name: [u8; 100],
170    /// File mode in octal ASCII.
171    pub mode: [u8; 8],
172    /// Owner user ID in octal ASCII.
173    pub uid: [u8; 8],
174    /// Owner group ID in octal ASCII.
175    pub gid: [u8; 8],
176    /// File size in octal ASCII.
177    pub size: [u8; 12],
178    /// Modification time as Unix timestamp in octal ASCII.
179    pub mtime: [u8; 12],
180    /// Header checksum in octal ASCII.
181    pub cksum: [u8; 8],
182    /// Entry type flag.
183    pub typeflag: [u8; 1],
184    /// Link target name for hard/symbolic links.
185    pub linkname: [u8; 100],
186    /// Magic string identifying the format ("ustar\0" for UStar).
187    pub magic: [u8; 6],
188    /// Format version ("00" for UStar).
189    pub version: [u8; 2],
190    /// Owner user name (null-terminated).
191    pub uname: [u8; 32],
192    /// Owner group name (null-terminated).
193    pub gname: [u8; 32],
194    /// Device major number in octal ASCII (for special files).
195    pub dev_major: [u8; 8],
196    /// Device minor number in octal ASCII (for special files).
197    pub dev_minor: [u8; 8],
198    /// Path prefix for names longer than 100 bytes.
199    pub prefix: [u8; 155],
200    /// Padding to fill the 512-byte block.
201    pub pad: [u8; 12],
202}
203
204impl Default for UstarHeader {
205    fn default() -> Self {
206        let mut header = Self::new_zeroed();
207        header.magic.copy_from_slice(USTAR_MAGIC);
208        header.version.copy_from_slice(USTAR_VERSION);
209        header
210    }
211}
212
213impl fmt::Debug for UstarHeader {
214    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
215        f.debug_struct("UstarHeader")
216            .field("name", &String::from_utf8_lossy(truncate_null(&self.name)))
217            .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode)))
218            .field("typeflag", &self.typeflag[0])
219            .field("magic", &self.magic)
220            .field(
221                "uname",
222                &String::from_utf8_lossy(truncate_null(&self.uname)),
223            )
224            .finish_non_exhaustive()
225    }
226}
227
228/// A decoded sparse file data region.
229///
230/// Each entry describes a contiguous region of real data within a sparse
231/// file. Gaps between entries are implicitly zero-filled.
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub struct SparseEntry {
234    /// Byte offset of this data region within the logical file.
235    pub offset: u64,
236    /// Number of bytes of real data in this region.
237    pub length: u64,
238}
239
240/// GNU tar sparse file chunk descriptor.
241///
242/// Each descriptor specifies a region of data in a sparse file.
243/// Both offset and numbytes are 12-byte octal ASCII fields.
244#[derive(Clone, Copy, Default, FromBytes, IntoBytes, Immutable, KnownLayout)]
245#[repr(C)]
246pub struct GnuSparseHeader {
247    /// Byte offset of this chunk within the file.
248    pub offset: [u8; 12],
249    /// Number of bytes in this chunk.
250    pub numbytes: [u8; 12],
251}
252
253impl GnuSparseHeader {
254    /// Returns true if this descriptor is empty (offset or numbytes starts
255    /// with a zero byte, indicating an unused slot).
256    #[must_use]
257    pub fn is_empty(&self) -> bool {
258        self.offset[0] == 0 || self.numbytes[0] == 0
259    }
260
261    /// Parse offset and length into a [`SparseEntry`].
262    ///
263    /// Handles both octal ASCII and GNU base-256 encoding.
264    ///
265    /// # Errors
266    ///
267    /// Returns [`HeaderError::InvalidOctal`] if either field is malformed.
268    pub fn to_sparse_entry(&self) -> Result<SparseEntry> {
269        Ok(SparseEntry {
270            offset: parse_numeric(&self.offset)?,
271            length: parse_numeric(&self.numbytes)?,
272        })
273    }
274
275    /// Write a [`SparseEntry`] into this descriptor.
276    ///
277    /// Uses octal ASCII if the values fit, otherwise GNU base-256 encoding.
278    pub fn set(&mut self, entry: &SparseEntry) {
279        encode_numeric(&mut self.offset, entry.offset)
280            .expect("u64 always fits in 12-byte numeric field");
281        encode_numeric(&mut self.numbytes, entry.length)
282            .expect("u64 always fits in 12-byte numeric field");
283    }
284
285    /// Get the offset of this sparse chunk.
286    ///
287    /// Handles both octal ASCII and GNU base-256 encoding.
288    ///
289    /// # Errors
290    ///
291    /// Returns [`HeaderError::InvalidOctal`] if the field is malformed.
292    pub fn offset(&self) -> Result<u64> {
293        parse_numeric(&self.offset)
294    }
295
296    /// Set the offset of this sparse chunk.
297    ///
298    /// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding.
299    pub fn set_offset(&mut self, offset: u64) {
300        encode_numeric(&mut self.offset, offset).expect("u64 always fits in 12-byte numeric field");
301    }
302
303    /// Get the length (numbytes) of this sparse chunk.
304    ///
305    /// Handles both octal ASCII and GNU base-256 encoding.
306    ///
307    /// # Errors
308    ///
309    /// Returns [`HeaderError::InvalidOctal`] if the field is malformed.
310    pub fn length(&self) -> Result<u64> {
311        parse_numeric(&self.numbytes)
312    }
313
314    /// Set the length (numbytes) of this sparse chunk.
315    ///
316    /// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding.
317    pub fn set_length(&mut self, length: u64) {
318        encode_numeric(&mut self.numbytes, length)
319            .expect("u64 always fits in 12-byte numeric field");
320    }
321}
322
323impl fmt::Debug for GnuSparseHeader {
324    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
325        f.debug_struct("GnuSparseHeader")
326            .field("offset", &parse_octal(&self.offset).ok())
327            .field("numbytes", &parse_octal(&self.numbytes).ok())
328            .finish()
329    }
330}
331
332/// GNU tar header format with sparse file support.
333///
334/// This format extends UStar with support for sparse files, access/creation
335/// times, and long name handling. The prefix field is replaced with
336/// additional metadata. See module-level documentation for the field layout table.
337#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)]
338#[repr(C)]
339pub struct GnuHeader {
340    /// File path name (null-terminated if shorter than 100 bytes).
341    pub name: [u8; 100],
342    /// File mode in octal ASCII.
343    pub mode: [u8; 8],
344    /// Owner user ID in octal ASCII.
345    pub uid: [u8; 8],
346    /// Owner group ID in octal ASCII.
347    pub gid: [u8; 8],
348    /// File size in octal ASCII (for sparse files, this is the size on disk).
349    pub size: [u8; 12],
350    /// Modification time as Unix timestamp in octal ASCII.
351    pub mtime: [u8; 12],
352    /// Header checksum in octal ASCII.
353    pub cksum: [u8; 8],
354    /// Entry type flag.
355    pub typeflag: [u8; 1],
356    /// Link target name for hard/symbolic links.
357    pub linkname: [u8; 100],
358    /// Magic string identifying the format ("ustar " for GNU).
359    pub magic: [u8; 6],
360    /// Format version (" \0" for GNU).
361    pub version: [u8; 2],
362    /// Owner user name (null-terminated).
363    pub uname: [u8; 32],
364    /// Owner group name (null-terminated).
365    pub gname: [u8; 32],
366    /// Device major number in octal ASCII (for special files).
367    pub dev_major: [u8; 8],
368    /// Device minor number in octal ASCII (for special files).
369    pub dev_minor: [u8; 8],
370    /// Access time in octal ASCII.
371    pub atime: [u8; 12],
372    /// Creation time in octal ASCII.
373    pub ctime: [u8; 12],
374    /// Offset for multivolume archives.
375    pub offset: [u8; 12],
376    /// Long names support (deprecated).
377    pub longnames: [u8; 4],
378    /// Unused padding byte.
379    pub unused: [u8; 1],
380    /// Sparse file chunk descriptors (4 entries).
381    pub sparse: [GnuSparseHeader; 4],
382    /// Flag indicating more sparse headers follow.
383    pub isextended: [u8; 1],
384    /// Real size of sparse file (uncompressed).
385    pub realsize: [u8; 12],
386    /// Padding to fill the 512-byte block.
387    pub pad: [u8; 17],
388}
389
390impl Default for GnuHeader {
391    fn default() -> Self {
392        let mut header = Self::new_zeroed();
393        header.magic.copy_from_slice(GNU_MAGIC);
394        header.version.copy_from_slice(GNU_VERSION);
395        header
396    }
397}
398
399impl GnuHeader {
400    /// Get the access time in Unix timestamp format.
401    ///
402    /// # Errors
403    ///
404    /// Returns [`HeaderError::InvalidOctal`] if the atime field is not valid.
405    pub fn atime(&self) -> Result<u64> {
406        parse_numeric(&self.atime)
407    }
408
409    /// Set the access time as a Unix timestamp.
410    ///
411    /// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding.
412    /// The 12-byte atime field can represent any `u64`.
413    pub fn set_atime(&mut self, atime: u64) {
414        // 12-byte field has 95 data bits in base-256, more than u64 needs.
415        encode_numeric(&mut self.atime, atime).expect("u64 always fits in 12-byte numeric field");
416    }
417
418    /// Get the change time in Unix timestamp format.
419    ///
420    /// # Errors
421    ///
422    /// Returns [`HeaderError::InvalidOctal`] if the ctime field is not valid.
423    pub fn ctime(&self) -> Result<u64> {
424        parse_numeric(&self.ctime)
425    }
426
427    /// Set the change time as a Unix timestamp.
428    ///
429    /// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding.
430    /// The 12-byte ctime field can represent any `u64`.
431    pub fn set_ctime(&mut self, ctime: u64) {
432        // 12-byte field has 95 data bits in base-256, more than u64 needs.
433        encode_numeric(&mut self.ctime, ctime).expect("u64 always fits in 12-byte numeric field");
434    }
435
436    /// Get the "real size" of a sparse file.
437    ///
438    /// For sparse files, this is the size of the entire file after the sparse
439    /// regions have been filled in. For non-sparse files, this may be zero.
440    ///
441    /// # Errors
442    ///
443    /// Returns [`HeaderError::InvalidOctal`] if the realsize field is not valid.
444    pub fn real_size(&self) -> Result<u64> {
445        parse_numeric(&self.realsize)
446    }
447
448    /// Set the "real size" of a sparse file.
449    ///
450    /// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding.
451    /// The 12-byte realsize field can represent any `u64`.
452    pub fn set_real_size(&mut self, size: u64) {
453        // 12-byte field has 95 data bits in base-256, more than u64 needs.
454        encode_numeric(&mut self.realsize, size).expect("u64 always fits in 12-byte numeric field");
455    }
456
457    /// Returns whether this header will be followed by additional sparse headers.
458    ///
459    /// When true, the next 512-byte block contains a [`GnuExtSparseHeader`].
460    #[must_use]
461    pub fn is_extended(&self) -> bool {
462        self.isextended[0] == 1
463    }
464
465    /// Sets whether this header should be followed by additional sparse headers.
466    pub fn set_is_extended(&mut self, extended: bool) {
467        self.isextended[0] = if extended { 1 } else { 0 };
468    }
469}
470
471impl fmt::Debug for GnuHeader {
472    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
473        f.debug_struct("GnuHeader")
474            .field("name", &String::from_utf8_lossy(truncate_null(&self.name)))
475            .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode)))
476            .field("typeflag", &self.typeflag[0])
477            .field("magic", &self.magic)
478            .field("isextended", &self.isextended[0])
479            .finish_non_exhaustive()
480    }
481}
482
483/// Extended sparse header block for GNU tar.
484///
485/// When a file has more than 4 sparse regions, additional sparse headers
486/// are stored in separate 512-byte blocks following the main header.
487/// Each block contains 21 sparse descriptors plus an `isextended` flag.
488#[derive(Clone, Copy, Default, FromBytes, IntoBytes, Immutable, KnownLayout)]
489#[repr(C)]
490pub struct GnuExtSparseHeader {
491    /// Sparse chunk descriptors (21 entries).
492    pub sparse: [GnuSparseHeader; 21],
493    /// Flag indicating more sparse headers follow.
494    pub isextended: [u8; 1],
495    /// Padding to fill the 512-byte block.
496    pub pad: [u8; 7],
497}
498
499impl GnuExtSparseHeader {
500    /// Returns whether another extension block follows this one.
501    #[must_use]
502    pub fn is_extended(&self) -> bool {
503        self.isextended[0] == 1
504    }
505
506    /// Sets whether another extension block follows this one.
507    pub fn set_is_extended(&mut self, extended: bool) {
508        self.isextended[0] = if extended { 1 } else { 0 };
509    }
510}
511
512impl fmt::Debug for GnuExtSparseHeader {
513    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
514        f.debug_struct("GnuExtSparseHeader")
515            .field("isextended", &self.isextended[0])
516            .finish_non_exhaustive()
517    }
518}
519
520// ============================================================================
521// Entry Type
522// ============================================================================
523
524/// Tar entry type indicating the kind of file system object.
525///
526/// The type is stored as a single ASCII byte in the header. Some types
527/// are extensions defined by POSIX or GNU tar.
528#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
529pub enum EntryType {
530    /// Regular file (type '0' or '\0' for old tar compatibility).
531    Regular,
532    /// Hard link to another file in the archive (type '1').
533    Link,
534    /// Symbolic link (type '2').
535    Symlink,
536    /// Character device (type '3').
537    Char,
538    /// Block device (type '4').
539    Block,
540    /// Directory (type '5').
541    Directory,
542    /// FIFO/named pipe (type '6').
543    Fifo,
544    /// Contiguous file (type '7', rarely used).
545    Continuous,
546    /// GNU tar long name extension (type 'L').
547    GnuLongName,
548    /// GNU tar long link extension (type 'K').
549    GnuLongLink,
550    /// GNU tar sparse file (type 'S').
551    GnuSparse,
552    /// PAX extended header for next entry (type 'x').
553    XHeader,
554    /// PAX global extended header (type 'g').
555    XGlobalHeader,
556    /// Unknown or unsupported entry type.
557    Other(u8),
558}
559
560impl EntryType {
561    // =========================================================================
562    // Constructors
563    // =========================================================================
564
565    /// Create an entry type from a raw byte value.
566    ///
567    /// This is an alias for [`from_byte`](Self::from_byte) provided for
568    /// compatibility with the `tar` crate's API.
569    #[inline]
570    #[must_use]
571    pub fn new(byte: u8) -> Self {
572        Self::from_byte(byte)
573    }
574
575    /// Parse an entry type from a raw byte value.
576    #[must_use]
577    pub fn from_byte(byte: u8) -> Self {
578        match byte {
579            b'0' | b'\0' => EntryType::Regular,
580            b'1' => EntryType::Link,
581            b'2' => EntryType::Symlink,
582            b'3' => EntryType::Char,
583            b'4' => EntryType::Block,
584            b'5' => EntryType::Directory,
585            b'6' => EntryType::Fifo,
586            b'7' => EntryType::Continuous,
587            b'L' => EntryType::GnuLongName,
588            b'K' => EntryType::GnuLongLink,
589            b'S' => EntryType::GnuSparse,
590            b'x' => EntryType::XHeader,
591            b'g' => EntryType::XGlobalHeader,
592            other => EntryType::Other(other),
593        }
594    }
595
596    /// Creates a new entry type representing a regular file.
597    #[must_use]
598    pub fn file() -> Self {
599        Self::Regular
600    }
601
602    /// Creates a new entry type representing a hard link.
603    #[must_use]
604    pub fn hard_link() -> Self {
605        Self::Link
606    }
607
608    /// Creates a new entry type representing a symlink.
609    #[must_use]
610    pub fn symlink() -> Self {
611        Self::Symlink
612    }
613
614    /// Creates a new entry type representing a character device.
615    #[must_use]
616    pub fn character_special() -> Self {
617        Self::Char
618    }
619
620    /// Creates a new entry type representing a block device.
621    #[must_use]
622    pub fn block_special() -> Self {
623        Self::Block
624    }
625
626    /// Creates a new entry type representing a directory.
627    #[must_use]
628    pub fn dir() -> Self {
629        Self::Directory
630    }
631
632    /// Creates a new entry type representing a FIFO (named pipe).
633    #[must_use]
634    pub fn fifo() -> Self {
635        Self::Fifo
636    }
637
638    /// Creates a new entry type representing a contiguous file.
639    #[must_use]
640    pub fn contiguous() -> Self {
641        Self::Continuous
642    }
643
644    // =========================================================================
645    // Conversion
646    // =========================================================================
647
648    /// Return the raw byte representation of this entry type.
649    ///
650    /// This is an alias for [`to_byte`](Self::to_byte) provided for
651    /// compatibility with the `tar` crate's API.
652    #[inline]
653    #[must_use]
654    pub fn as_byte(self) -> u8 {
655        self.to_byte()
656    }
657
658    /// Convert an entry type to its raw byte representation.
659    ///
660    /// Note that `Regular` is encoded as '0', not '\0'.
661    #[must_use]
662    pub fn to_byte(self) -> u8 {
663        match self {
664            EntryType::Regular => b'0',
665            EntryType::Link => b'1',
666            EntryType::Symlink => b'2',
667            EntryType::Char => b'3',
668            EntryType::Block => b'4',
669            EntryType::Directory => b'5',
670            EntryType::Fifo => b'6',
671            EntryType::Continuous => b'7',
672            EntryType::GnuLongName => b'L',
673            EntryType::GnuLongLink => b'K',
674            EntryType::GnuSparse => b'S',
675            EntryType::XHeader => b'x',
676            EntryType::XGlobalHeader => b'g',
677            EntryType::Other(b) => b,
678        }
679    }
680
681    // =========================================================================
682    // Predicates
683    // =========================================================================
684
685    /// Returns true if this is a regular file entry.
686    #[must_use]
687    pub fn is_file(self) -> bool {
688        matches!(self, EntryType::Regular | EntryType::Continuous)
689    }
690
691    /// Returns true if this is a directory entry.
692    #[must_use]
693    pub fn is_dir(self) -> bool {
694        self == EntryType::Directory
695    }
696
697    /// Returns true if this is a symbolic link entry.
698    #[must_use]
699    pub fn is_symlink(self) -> bool {
700        self == EntryType::Symlink
701    }
702
703    /// Returns true if this is a hard link entry.
704    #[must_use]
705    pub fn is_hard_link(self) -> bool {
706        self == EntryType::Link
707    }
708
709    /// Returns true if this is a character device entry.
710    #[must_use]
711    pub fn is_character_special(self) -> bool {
712        self == EntryType::Char
713    }
714
715    /// Returns true if this is a block device entry.
716    #[must_use]
717    pub fn is_block_special(self) -> bool {
718        self == EntryType::Block
719    }
720
721    /// Returns true if this is a FIFO (named pipe) entry.
722    #[must_use]
723    pub fn is_fifo(self) -> bool {
724        self == EntryType::Fifo
725    }
726
727    /// Returns true if this is a contiguous file entry.
728    #[must_use]
729    pub fn is_contiguous(self) -> bool {
730        self == EntryType::Continuous
731    }
732
733    /// Returns true if this is a GNU long name extension entry.
734    #[must_use]
735    pub fn is_gnu_longname(self) -> bool {
736        self == EntryType::GnuLongName
737    }
738
739    /// Returns true if this is a GNU long link extension entry.
740    #[must_use]
741    pub fn is_gnu_longlink(self) -> bool {
742        self == EntryType::GnuLongLink
743    }
744
745    /// Returns true if this is a GNU sparse file entry.
746    #[must_use]
747    pub fn is_gnu_sparse(self) -> bool {
748        self == EntryType::GnuSparse
749    }
750
751    /// Returns true if this is a PAX global extended header entry.
752    #[must_use]
753    pub fn is_pax_global_extensions(self) -> bool {
754        self == EntryType::XGlobalHeader
755    }
756
757    /// Returns true if this is a PAX local extended header entry.
758    #[must_use]
759    pub fn is_pax_local_extensions(self) -> bool {
760        self == EntryType::XHeader
761    }
762}
763
764impl From<u8> for EntryType {
765    fn from(byte: u8) -> Self {
766        Self::from_byte(byte)
767    }
768}
769
770impl From<EntryType> for u8 {
771    fn from(entry_type: EntryType) -> Self {
772        entry_type.to_byte()
773    }
774}
775
776// ============================================================================
777// Header Wrapper
778// ============================================================================
779
780/// High-level tar header wrapper with accessor methods.
781///
782/// This struct wraps a `[u8; 512]` and provides convenient methods for
783/// accessing header fields, detecting the format, and verifying checksums.
784///
785/// # Format Detection
786///
787/// The format is detected by examining the magic field:
788/// - UStar: magic = "ustar\0", version = "00"
789/// - GNU: magic = "ustar ", version = " \0"
790/// - Old: anything else
791///
792/// # Example
793///
794/// ```
795/// use tar_core::Header;
796///
797/// let mut header = Header::new_ustar();
798/// assert!(header.is_ustar());
799/// assert!(!header.is_gnu());
800/// ```
801#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)]
802#[repr(transparent)]
803pub struct Header {
804    bytes: [u8; HEADER_SIZE],
805}
806
807impl Header {
808    /// Create a new header with UStar format magic and version.
809    #[must_use]
810    pub fn new_ustar() -> Self {
811        let mut header = Self {
812            bytes: [0u8; HEADER_SIZE],
813        };
814        let ustar = header.as_ustar_mut();
815        ustar.magic.copy_from_slice(USTAR_MAGIC);
816        ustar.version.copy_from_slice(USTAR_VERSION);
817        header
818    }
819
820    /// Create a new header with GNU tar format magic and version.
821    #[must_use]
822    pub fn new_gnu() -> Self {
823        let mut header = Self {
824            bytes: [0u8; HEADER_SIZE],
825        };
826        let gnu = header.as_gnu_mut();
827        gnu.magic.copy_from_slice(GNU_MAGIC);
828        gnu.version.copy_from_slice(GNU_VERSION);
829        header
830    }
831
832    /// Create a new old-style (V7/POSIX.1-1988) header with no magic bytes.
833    ///
834    /// This header format is the original archive header format which all other
835    /// versions are compatible with (e.g., they are a superset). This header
836    /// format limits path name length and cannot contain extra metadata like
837    /// atime/ctime.
838    #[must_use]
839    pub fn new_old() -> Self {
840        Self {
841            bytes: [0u8; HEADER_SIZE],
842        }
843    }
844
845    /// Get a reference to the underlying bytes.
846    #[must_use]
847    pub fn as_bytes(&self) -> &[u8; HEADER_SIZE] {
848        &self.bytes
849    }
850
851    /// Get a mutable reference to the underlying bytes.
852    pub fn as_mut_bytes(&mut self) -> &mut [u8; HEADER_SIZE] {
853        &mut self.bytes
854    }
855
856    /// Interpret a `[u8; 512]` as a tar header reference.
857    #[must_use]
858    pub fn from_bytes(bytes: &[u8; HEADER_SIZE]) -> &Header {
859        Header::ref_from_bytes(bytes).expect("HEADER_SIZE is correct")
860    }
861
862    /// View this header as an old-style header.
863    #[must_use]
864    pub fn as_old(&self) -> &OldHeader {
865        OldHeader::ref_from_bytes(&self.bytes).expect("size is correct")
866    }
867
868    /// View this header as a UStar header.
869    #[must_use]
870    pub fn as_ustar(&self) -> &UstarHeader {
871        UstarHeader::ref_from_bytes(&self.bytes).expect("size is correct")
872    }
873
874    /// View this header as a GNU header.
875    #[must_use]
876    pub fn as_gnu(&self) -> &GnuHeader {
877        GnuHeader::ref_from_bytes(&self.bytes).expect("size is correct")
878    }
879
880    /// View this header as a UStar header if it has the correct magic.
881    ///
882    /// Returns `None` if this is not a UStar format header.
883    #[must_use]
884    pub fn try_as_ustar(&self) -> Option<&UstarHeader> {
885        if self.is_ustar() {
886            Some(self.as_ustar())
887        } else {
888            None
889        }
890    }
891
892    /// View this header as a GNU header if it has the correct magic.
893    ///
894    /// Returns `None` if this is not a GNU format header.
895    #[must_use]
896    pub fn try_as_gnu(&self) -> Option<&GnuHeader> {
897        if self.is_gnu() {
898            Some(self.as_gnu())
899        } else {
900            None
901        }
902    }
903
904    /// View this header as a mutable old-style header.
905    #[must_use]
906    pub fn as_old_mut(&mut self) -> &mut OldHeader {
907        OldHeader::mut_from_bytes(&mut self.bytes).expect("size is correct")
908    }
909
910    /// View this header as a mutable UStar header.
911    #[must_use]
912    pub fn as_ustar_mut(&mut self) -> &mut UstarHeader {
913        UstarHeader::mut_from_bytes(&mut self.bytes).expect("size is correct")
914    }
915
916    /// View this header as a mutable GNU header.
917    #[must_use]
918    pub fn as_gnu_mut(&mut self) -> &mut GnuHeader {
919        GnuHeader::mut_from_bytes(&mut self.bytes).expect("size is correct")
920    }
921
922    /// View this header as a mutable UStar header if it has the correct magic.
923    ///
924    /// Returns `None` if this is not a UStar format header.
925    #[must_use]
926    pub fn try_as_ustar_mut(&mut self) -> Option<&mut UstarHeader> {
927        if self.is_ustar() {
928            Some(self.as_ustar_mut())
929        } else {
930            None
931        }
932    }
933
934    /// View this header as a mutable GNU header if it has the correct magic.
935    ///
936    /// Returns `None` if this is not a GNU format header.
937    #[must_use]
938    pub fn try_as_gnu_mut(&mut self) -> Option<&mut GnuHeader> {
939        if self.is_gnu() {
940            Some(self.as_gnu_mut())
941        } else {
942            None
943        }
944    }
945
946    /// Check if this header uses UStar format.
947    #[must_use]
948    pub fn is_ustar(&self) -> bool {
949        let h = self.as_ustar();
950        h.magic == *USTAR_MAGIC && h.version == *USTAR_VERSION
951    }
952
953    /// Check if this header uses GNU tar format.
954    #[must_use]
955    pub fn is_gnu(&self) -> bool {
956        let h = self.as_gnu();
957        h.magic == *GNU_MAGIC && h.version == *GNU_VERSION
958    }
959
960    /// Get the entry type.
961    #[must_use]
962    pub fn entry_type(&self) -> EntryType {
963        EntryType::from_byte(self.as_ustar().typeflag[0])
964    }
965
966    /// Get the entry size (file content length) in bytes.
967    ///
968    /// # Errors
969    ///
970    /// Returns [`HeaderError::InvalidOctal`] if the size field is not valid.
971    pub fn entry_size(&self) -> Result<u64> {
972        parse_numeric(&self.as_ustar().size)
973    }
974
975    /// Get the file mode (permissions).
976    ///
977    /// # Errors
978    ///
979    /// Returns [`HeaderError::InvalidOctal`] if the mode field is not valid.
980    pub fn mode(&self) -> Result<u32> {
981        parse_numeric(&self.as_ustar().mode).map(|v| v as u32)
982    }
983
984    /// Get the owner user ID.
985    ///
986    /// # Errors
987    ///
988    /// Returns [`HeaderError::InvalidOctal`] if the uid field is not valid.
989    pub fn uid(&self) -> Result<u64> {
990        parse_numeric(&self.as_ustar().uid)
991    }
992
993    /// Get the owner group ID.
994    ///
995    /// # Errors
996    ///
997    /// Returns [`HeaderError::InvalidOctal`] if the gid field is not valid.
998    pub fn gid(&self) -> Result<u64> {
999        parse_numeric(&self.as_ustar().gid)
1000    }
1001
1002    /// Get the modification time as a Unix timestamp.
1003    ///
1004    /// # Errors
1005    ///
1006    /// Returns [`HeaderError::InvalidOctal`] if the mtime field is not valid.
1007    pub fn mtime(&self) -> Result<u64> {
1008        parse_numeric(&self.as_ustar().mtime)
1009    }
1010
1011    /// Get the raw path bytes from the header.
1012    ///
1013    /// This returns only the name field (bytes 0..100). For UStar format,
1014    /// the prefix field (bytes 345..500) may also contain path components
1015    /// that should be prepended.
1016    #[must_use]
1017    pub fn path_bytes(&self) -> &[u8] {
1018        truncate_null(&self.as_ustar().name)
1019    }
1020
1021    /// Get the raw link name bytes.
1022    #[must_use]
1023    pub fn link_name_bytes(&self) -> &[u8] {
1024        truncate_null(&self.as_ustar().linkname)
1025    }
1026
1027    /// Get the device major number (for character/block devices).
1028    ///
1029    /// Returns `None` for old-style headers without device fields.
1030    ///
1031    /// # Errors
1032    ///
1033    /// Returns [`HeaderError::InvalidOctal`] if the field is not valid octal.
1034    pub fn device_major(&self) -> Result<Option<u32>> {
1035        if !self.is_ustar() && !self.is_gnu() {
1036            return Ok(None);
1037        }
1038        parse_octal(&self.as_ustar().dev_major).map(|v| Some(v as u32))
1039    }
1040
1041    /// Get the device minor number (for character/block devices).
1042    ///
1043    /// Returns `None` for old-style headers without device fields.
1044    ///
1045    /// # Errors
1046    ///
1047    /// Returns [`HeaderError::InvalidOctal`] if the field is not valid octal.
1048    pub fn device_minor(&self) -> Result<Option<u32>> {
1049        if !self.is_ustar() && !self.is_gnu() {
1050            return Ok(None);
1051        }
1052        parse_octal(&self.as_ustar().dev_minor).map(|v| Some(v as u32))
1053    }
1054
1055    /// Get the owner user name.
1056    ///
1057    /// Returns `None` for old-style headers without user/group name fields.
1058    #[must_use]
1059    pub fn username(&self) -> Option<&[u8]> {
1060        if !self.is_ustar() && !self.is_gnu() {
1061            return None;
1062        }
1063        Some(truncate_null(&self.as_ustar().uname))
1064    }
1065
1066    /// Get the owner group name.
1067    ///
1068    /// Returns `None` for old-style headers without user/group name fields.
1069    #[must_use]
1070    pub fn groupname(&self) -> Option<&[u8]> {
1071        if !self.is_ustar() && !self.is_gnu() {
1072            return None;
1073        }
1074        Some(truncate_null(&self.as_ustar().gname))
1075    }
1076
1077    /// Get the UStar prefix field for long paths.
1078    ///
1079    /// Returns `None` for old-style or GNU headers.
1080    #[must_use]
1081    pub fn prefix(&self) -> Option<&[u8]> {
1082        if !self.is_ustar() {
1083            return None;
1084        }
1085        Some(truncate_null(&self.as_ustar().prefix))
1086    }
1087
1088    /// Verify the header checksum.
1089    ///
1090    /// The checksum is computed as the unsigned sum of all header bytes,
1091    /// treating the checksum field (bytes 148..156) as spaces.
1092    ///
1093    /// # Errors
1094    ///
1095    /// Returns [`HeaderError::ChecksumMismatch`] if the checksum is invalid,
1096    /// or [`HeaderError::InvalidOctal`] if the stored checksum cannot be parsed.
1097    pub fn verify_checksum(&self) -> Result<()> {
1098        let expected = parse_octal(&self.as_ustar().cksum)?;
1099        let computed = self.compute_checksum();
1100        if expected == computed {
1101            Ok(())
1102        } else {
1103            Err(HeaderError::ChecksumMismatch { expected, computed })
1104        }
1105    }
1106
1107    /// Compute the header checksum.
1108    ///
1109    /// This computes the unsigned sum of all header bytes, treating the
1110    /// checksum field (bytes 148..156) as spaces (0x20).
1111    #[must_use]
1112    pub fn compute_checksum(&self) -> u64 {
1113        let mut sum: u64 = 0;
1114        for (i, &byte) in self.bytes.iter().enumerate() {
1115            if (148..156).contains(&i) {
1116                // Treat checksum field as spaces
1117                sum += u64::from(b' ');
1118            } else {
1119                sum += u64::from(byte);
1120            }
1121        }
1122        sum
1123    }
1124
1125    /// Check if this header represents an empty block (all zeros).
1126    ///
1127    /// Two consecutive empty blocks mark the end of a tar archive.
1128    #[must_use]
1129    pub fn is_empty(&self) -> bool {
1130        self.bytes.iter().all(|&b| b == 0)
1131    }
1132
1133    // =========================================================================
1134    // Setter Methods
1135    // =========================================================================
1136
1137    /// Format-aware numeric field encoder.
1138    ///
1139    /// For GNU headers, uses `encode_numeric` which falls back to base-256
1140    /// encoding for large values. For ustar (and other formats), uses
1141    /// `encode_octal` only, since base-256 is a GNU extension.
1142    fn set_numeric_field<const N: usize>(
1143        &mut self,
1144        field: impl FnOnce(&mut UstarHeader) -> &mut [u8; N],
1145        value: u64,
1146    ) -> Result<()> {
1147        let is_gnu = self.is_gnu();
1148        let dst = field(self.as_ustar_mut());
1149        if is_gnu {
1150            encode_numeric(dst, value)
1151        } else {
1152            encode_octal(dst, value)
1153        }
1154    }
1155
1156    /// Set the file size (bytes 124-136).
1157    ///
1158    /// For GNU headers, uses octal ASCII if the value fits, otherwise
1159    /// base-256 encoding. For ustar headers, uses octal ASCII only.
1160    ///
1161    /// For values that always fit regardless of format (≤ ~8GB), prefer the
1162    /// infallible [`set_size_small`](Self::set_size_small).
1163    ///
1164    /// # Errors
1165    ///
1166    /// Returns [`HeaderError::FieldOverflow`] if the value cannot be
1167    /// represented. For ustar, the octal limit is 0o77777777777 (8,589,934,591).
1168    /// For GNU, any `u64` fits via base-256.
1169    pub fn set_size(&mut self, size: u64) -> Result<()> {
1170        self.set_numeric_field(|h| &mut h.size, size)
1171    }
1172
1173    /// Set the file size from a `u32` (bytes 124-136).
1174    ///
1175    /// Infallible because any `u32` (max ~4.3 billion) fits in the 12-byte
1176    /// octal field (max 8,589,934,591) regardless of header format.
1177    pub fn set_size_small(&mut self, size: u32) {
1178        encode_octal(&mut self.as_ustar_mut().size, u64::from(size))
1179            .expect("u32 always fits in 12-byte octal field");
1180    }
1181
1182    /// Set the file mode (bytes 100-108).
1183    ///
1184    /// The mode is always written as octal ASCII (both GNU and ustar).
1185    ///
1186    /// For typical Unix modes (≤ 0o7777), prefer the infallible
1187    /// [`set_mode_small`](Self::set_mode_small).
1188    ///
1189    /// # Errors
1190    ///
1191    /// Returns [`HeaderError::FieldOverflow`] if the value exceeds the 8-byte
1192    /// octal capacity (max 0o7777777 = 2,097,151).
1193    pub fn set_mode(&mut self, mode: u32) -> Result<()> {
1194        encode_octal(&mut self.as_ustar_mut().mode, u64::from(mode))
1195    }
1196
1197    /// Set the file mode from a `u16` (bytes 100-108).
1198    ///
1199    /// Infallible because any `u16` (max 65,535) fits in the 8-byte octal
1200    /// field (max 2,097,151). Covers all standard Unix permission bits
1201    /// (0o7777).
1202    pub fn set_mode_small(&mut self, mode: u16) {
1203        encode_octal(&mut self.as_ustar_mut().mode, u64::from(mode))
1204            .expect("u16 always fits in 8-byte octal field");
1205    }
1206
1207    /// Set the owner user ID (bytes 108-116).
1208    ///
1209    /// For GNU headers, uses base-256 encoding for values that exceed the
1210    /// octal range. For ustar headers, only octal ASCII is available.
1211    ///
1212    /// # Errors
1213    ///
1214    /// Returns [`HeaderError::FieldOverflow`] if the value cannot be
1215    /// represented. For ustar, the octal limit is 0o7777777 (2,097,151).
1216    /// For GNU, the base-256 limit is 2^63 - 1.
1217    pub fn set_uid(&mut self, uid: u64) -> Result<()> {
1218        self.set_numeric_field(|h| &mut h.uid, uid)
1219    }
1220
1221    /// Set the owner group ID (bytes 116-124).
1222    ///
1223    /// For GNU headers, uses base-256 encoding for values that exceed the
1224    /// octal range. For ustar headers, only octal ASCII is available.
1225    ///
1226    /// # Errors
1227    ///
1228    /// Returns [`HeaderError::FieldOverflow`] if the value cannot be
1229    /// represented. For ustar, the octal limit is 0o7777777 (2,097,151).
1230    /// For GNU, the base-256 limit is 2^63 - 1.
1231    pub fn set_gid(&mut self, gid: u64) -> Result<()> {
1232        self.set_numeric_field(|h| &mut h.gid, gid)
1233    }
1234
1235    // Note: no _small variants for uid/gid — u32 values can exceed the
1236    // ustar octal limit (2,097,151), so they're not format-independent.
1237    // Use PAX extensions (via EntryBuilder) for large IDs on ustar.
1238
1239    /// Set the modification time as a Unix timestamp (bytes 136-148).
1240    ///
1241    /// For GNU headers, uses octal ASCII if the value fits, otherwise
1242    /// base-256 encoding. For ustar headers, uses octal ASCII only.
1243    ///
1244    /// For timestamps that always fit regardless of format (≤ ~2106), prefer
1245    /// the infallible [`set_mtime_small`](Self::set_mtime_small).
1246    ///
1247    /// # Errors
1248    ///
1249    /// Returns [`HeaderError::FieldOverflow`] if the value cannot be
1250    /// represented. For ustar, the octal limit is 0o77777777777 (8,589,934,591).
1251    /// For GNU, any `u64` fits via base-256.
1252    pub fn set_mtime(&mut self, mtime: u64) -> Result<()> {
1253        self.set_numeric_field(|h| &mut h.mtime, mtime)
1254    }
1255
1256    /// Set the modification time from a `u32` Unix timestamp (bytes 136-148).
1257    ///
1258    /// Infallible because any `u32` (max ~4.3 billion, i.e. year ~2106) fits
1259    /// in the 12-byte octal field regardless of header format.
1260    pub fn set_mtime_small(&mut self, mtime: u32) {
1261        encode_octal(&mut self.as_ustar_mut().mtime, u64::from(mtime))
1262            .expect("u32 always fits in 12-byte octal field");
1263    }
1264
1265    /// Set the entry type (byte 156).
1266    pub fn set_entry_type(&mut self, ty: EntryType) {
1267        self.as_ustar_mut().typeflag[0] = ty.to_byte();
1268    }
1269
1270    /// Compute and set the header checksum (bytes 148-156).
1271    ///
1272    /// This should be called after all other header fields have been set.
1273    /// The format is 7 octal digits with leading zeros plus a null terminator,
1274    /// matching tar-rs for bit-identical output.
1275    pub fn set_checksum(&mut self) {
1276        // Fill checksum field with spaces for calculation
1277        self.as_ustar_mut().cksum.fill(b' ');
1278
1279        // Compute unsigned sum of all bytes
1280        let checksum: u64 = self.bytes.iter().map(|&b| u64::from(b)).sum();
1281
1282        // Max checksum = 512 * 255 = 130560, which always fits in 8-byte octal
1283        // (max representable: 07777777 = 2097151).
1284        encode_octal(&mut self.as_ustar_mut().cksum, checksum)
1285            .expect("checksum always fits in 8-byte octal field");
1286    }
1287
1288    /// Set the file path (name field, bytes 0-100).
1289    ///
1290    /// # Errors
1291    ///
1292    /// Returns an error if the path is longer than 100 bytes.
1293    pub fn set_path(&mut self, path: &[u8]) -> Result<()> {
1294        if path.len() > self.as_ustar().name.len() {
1295            return Err(HeaderError::FieldOverflow {
1296                field_len: self.as_ustar().name.len(),
1297                detail: format!("{}-byte path", path.len()),
1298            });
1299        }
1300        let name = &mut self.as_ustar_mut().name;
1301        name.fill(0);
1302        name[..path.len()].copy_from_slice(path);
1303        Ok(())
1304    }
1305
1306    /// Set the link name (bytes 157-257).
1307    ///
1308    /// # Errors
1309    ///
1310    /// Returns an error if the link name is longer than 100 bytes.
1311    pub fn set_link_name(&mut self, link: &[u8]) -> Result<()> {
1312        if link.len() > self.as_ustar().linkname.len() {
1313            return Err(HeaderError::FieldOverflow {
1314                field_len: self.as_ustar().linkname.len(),
1315                detail: format!("{}-byte link name", link.len()),
1316            });
1317        }
1318        let linkname = &mut self.as_ustar_mut().linkname;
1319        linkname.fill(0);
1320        linkname[..link.len()].copy_from_slice(link);
1321        Ok(())
1322    }
1323
1324    /// Set the owner user name (bytes 265-297, UStar/GNU only).
1325    ///
1326    /// # Errors
1327    ///
1328    /// Returns an error if the username is longer than 32 bytes.
1329    pub fn set_username(&mut self, name: &[u8]) -> Result<()> {
1330        if name.len() > self.as_ustar().uname.len() {
1331            return Err(HeaderError::FieldOverflow {
1332                field_len: self.as_ustar().uname.len(),
1333                detail: format!("{}-byte username", name.len()),
1334            });
1335        }
1336        let uname = &mut self.as_ustar_mut().uname;
1337        uname.fill(0);
1338        uname[..name.len()].copy_from_slice(name);
1339        Ok(())
1340    }
1341
1342    /// Set the owner group name (bytes 297-329, UStar/GNU only).
1343    ///
1344    /// # Errors
1345    ///
1346    /// Returns an error if the group name is longer than 32 bytes.
1347    pub fn set_groupname(&mut self, name: &[u8]) -> Result<()> {
1348        if name.len() > self.as_ustar().gname.len() {
1349            return Err(HeaderError::FieldOverflow {
1350                field_len: self.as_ustar().gname.len(),
1351                detail: format!("{}-byte group name", name.len()),
1352            });
1353        }
1354        let gname = &mut self.as_ustar_mut().gname;
1355        gname.fill(0);
1356        gname[..name.len()].copy_from_slice(name);
1357        Ok(())
1358    }
1359
1360    /// Set device major and minor numbers (bytes 329-337 and 337-345).
1361    ///
1362    /// Used for character and block device entries.
1363    ///
1364    /// # Errors
1365    ///
1366    /// Returns [`HeaderError::FieldOverflow`] if either value cannot be
1367    /// represented in its 8-byte octal field (max 0o7777777 = 2097151).
1368    /// For device numbers that fit in `u16`, prefer the infallible
1369    /// [`set_device_small`](Self::set_device_small).
1370    pub fn set_device(&mut self, major: u32, minor: u32) -> Result<()> {
1371        let fields = self.as_ustar_mut();
1372        encode_octal(&mut fields.dev_major, u64::from(major))?;
1373        encode_octal(&mut fields.dev_minor, u64::from(minor))
1374    }
1375
1376    /// Set device major and minor numbers from `u16` values.
1377    ///
1378    /// Infallible because any `u16` (max 65535) fits in the 8-byte octal
1379    /// fields (max 2097151). Covers all real-world device numbers.
1380    pub fn set_device_small(&mut self, major: u16, minor: u16) {
1381        let fields = self.as_ustar_mut();
1382        encode_octal(&mut fields.dev_major, u64::from(major))
1383            .expect("u16 always fits in 8-byte octal field");
1384        encode_octal(&mut fields.dev_minor, u64::from(minor))
1385            .expect("u16 always fits in 8-byte octal field");
1386    }
1387}
1388
1389impl Default for Header {
1390    fn default() -> Self {
1391        Self::new_ustar()
1392    }
1393}
1394
1395impl fmt::Debug for Header {
1396    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1397        f.debug_struct("Header")
1398            .field("path", &String::from_utf8_lossy(self.path_bytes()))
1399            .field("entry_type", &self.entry_type())
1400            .field("size", &self.entry_size().ok())
1401            .field("mode", &self.mode().ok().map(|m| format!("{m:04o}")))
1402            .field("is_ustar", &self.is_ustar())
1403            .field("is_gnu", &self.is_gnu())
1404            .finish()
1405    }
1406}
1407
1408// ============================================================================
1409// Helper Functions
1410// ============================================================================
1411
1412/// Stack-allocated octal formatter for u64 values.
1413///
1414/// Formats a u64 as octal digits without allocating. The maximum u64
1415/// value (2^64 - 1) requires 22 octal digits, so the internal buffer
1416/// is always sufficient.
1417///
1418/// This is the octal counterpart of [`builder::DecU64`](crate::builder).
1419pub(crate) struct OctU64 {
1420    buf: [u8; 22],
1421    start: u8,
1422}
1423
1424impl OctU64 {
1425    /// Format `value` as octal digits.
1426    pub(crate) fn new(mut value: u64) -> Self {
1427        let mut buf = [0u8; 22];
1428        if value == 0 {
1429            buf[21] = b'0';
1430            return Self { buf, start: 21 };
1431        }
1432        let mut pos = 22u8;
1433        while value > 0 {
1434            pos -= 1;
1435            buf[pos as usize] = b'0' + (value & 7) as u8;
1436            value >>= 3;
1437        }
1438        Self { buf, start: pos }
1439    }
1440
1441    /// The formatted octal digits as a byte slice.
1442    pub(crate) fn as_bytes(&self) -> &[u8] {
1443        &self.buf[self.start as usize..]
1444    }
1445}
1446
1447/// Test whether a byte is whitespace in the context of tar header fields.
1448///
1449/// This includes all bytes that `u8::is_ascii_whitespace()` recognizes
1450/// (HT, LF, FF, CR, space) **plus** vertical tab (0x0b). Rust's
1451/// `is_ascii_whitespace` follows the WHATWG definition which omits VT,
1452/// but real tar implementations (and Rust's `str::trim()`) treat it as
1453/// whitespace. Without this, fields like `"0000000\x0b"` would fail to
1454/// parse.
1455fn is_tar_whitespace(b: u8) -> bool {
1456    b.is_ascii_whitespace() || b == 0x0b
1457}
1458
1459/// Parse an octal ASCII field into a u64.
1460///
1461/// Octal fields in tar headers are ASCII strings with optional leading
1462/// spaces and trailing spaces or null bytes. For example:
1463/// - `"0000644\0"` -> 420 (file mode 0644)
1464/// - `"     123 "` -> 83
1465///
1466/// # Errors
1467///
1468/// Returns [`HeaderError::InvalidOctal`] if the field contains invalid
1469/// characters (anything other than spaces, digits 0-7, or null bytes).
1470pub(crate) fn parse_octal(bytes: &[u8]) -> Result<u64> {
1471    // Tar octal fields are padded with leading spaces/nulls and terminated
1472    // by spaces, tabs, or null bytes. We first truncate at the first null
1473    // (matching how C-string fields work in tar), then trim whitespace from
1474    // both ends to isolate the digit run.
1475    //
1476    // Note: we use `is_tar_whitespace` rather than `u8::is_ascii_whitespace`
1477    // because the latter omits vertical tab (0x0b), which real tar
1478    // implementations treat as whitespace (and Rust's `str::trim()` strips).
1479    let truncated = match bytes.iter().position(|&b| b == 0) {
1480        Some(i) => &bytes[..i],
1481        None => bytes,
1482    };
1483    let trimmed = truncated
1484        .iter()
1485        .position(|&b| !is_tar_whitespace(b))
1486        .map(|start| {
1487            let rest = &truncated[start..];
1488            let end = rest
1489                .iter()
1490                .rposition(|&b| !is_tar_whitespace(b))
1491                .map_or(0, |p| p + 1);
1492            &rest[..end]
1493        })
1494        .unwrap_or(&[]);
1495
1496    if trimmed.is_empty() {
1497        return Ok(0);
1498    }
1499
1500    let s = core::str::from_utf8(trimmed).map_err(|_| HeaderError::InvalidOctal(bytes.to_vec()))?;
1501    u64::from_str_radix(s, 8).map_err(|_| HeaderError::InvalidOctal(bytes.to_vec()))
1502}
1503
1504/// Encode a u64 value to a numeric field.
1505///
1506/// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding
1507/// (high bit set in first byte). This matches tar-rs behavior for
1508/// compatibility.
1509///
1510/// # Thresholds
1511///
1512/// - For 12-byte fields (size, mtime): uses base-256 if value >= 8589934592 (8GB)
1513/// - For 8-byte fields (uid, gid): uses base-256 if value >= 2097152 (2^21)
1514///
1515/// # Errors
1516///
1517/// Returns [`HeaderError::FieldOverflow`] if the value exceeds the field's
1518/// representable range (e.g., values >= 2^63 in an 8-byte field).
1519pub(crate) fn encode_numeric<const N: usize>(field: &mut [u8; N], value: u64) -> Result<()> {
1520    const { assert!(N > 0, "encode_numeric requires N > 0") };
1521
1522    // Thresholds from tar-rs: use binary for large values
1523    let use_binary = if N == 8 {
1524        value >= 2097152 // 2^21, max for 7 octal digits
1525    } else {
1526        value >= 8589934592 // 8GB, threshold for 12-byte fields
1527    };
1528
1529    if use_binary {
1530        // GNU base-256 encoding: high bit of first byte is the indicator,
1531        // leaving N*8-1 data bits. For 8-byte fields that's 63 bits, for
1532        // 12-byte fields it's 95 bits (more than u64 needs).
1533        let data_bits = N * 8 - 1;
1534        if data_bits < 64 && value >= (1u64 << data_bits) {
1535            return Err(HeaderError::FieldOverflow {
1536                field_len: N,
1537                detail: format!("numeric value {value}"),
1538            });
1539        }
1540
1541        field.fill(0);
1542
1543        // Write the value in big-endian to the last 8 bytes (or fewer)
1544        let value_bytes = value.to_be_bytes();
1545        if N >= 8 {
1546            field[N - 8..].copy_from_slice(&value_bytes);
1547        } else {
1548            field.copy_from_slice(&value_bytes[8 - N..]);
1549        }
1550        // Set high bit to indicate base-256
1551        field[0] |= 0x80;
1552    } else {
1553        // Standard octal ASCII encoding
1554        encode_octal(field, value)?;
1555    }
1556
1557    Ok(())
1558}
1559
1560/// Encode a u64 value as octal ASCII into a tar header field.
1561///
1562/// The field is zero-filled, then populated with leading-zero-padded octal
1563/// digits followed by a null terminator, matching tar conventions (e.g.
1564/// mode 0644 in an 8-byte field becomes `b"0000644\0"`).
1565///
1566/// Uses [`OctU64`] internally for the digit conversion.
1567///
1568/// # Errors
1569///
1570/// Returns [`HeaderError::FieldOverflow`] if the value cannot fit.
1571pub(crate) fn encode_octal<const N: usize>(field: &mut [u8; N], value: u64) -> Result<()> {
1572    const { assert!(N > 0, "encode_octal requires N > 0") };
1573
1574    let oct = OctU64::new(value);
1575    let digits = oct.as_bytes();
1576
1577    // N-1 digit slots available (last byte is null terminator).
1578    if digits.len() > N - 1 {
1579        return Err(HeaderError::FieldOverflow {
1580            field_len: N,
1581            detail: format!("octal value {value:#o}"),
1582        });
1583    }
1584
1585    // Zero-fill first, then overwrite with '0'-padded digits.
1586    field.fill(0);
1587    let (digit_slots, _nul) = field.split_at_mut(N - 1);
1588    let pad = digit_slots.len() - digits.len();
1589    digit_slots[..pad].fill(b'0');
1590    digit_slots[pad..].copy_from_slice(digits);
1591
1592    Ok(())
1593}
1594
1595/// Parse a numeric field that may be octal ASCII or GNU base-256 encoded.
1596///
1597/// GNU tar uses base-256 encoding for values that don't fit in octal.
1598/// When the high bit of the first byte is set (0x80), the value is stored
1599/// as big-endian binary in the remaining bytes. Otherwise, it's parsed as
1600/// octal ASCII.
1601///
1602/// # Errors
1603///
1604/// Returns [`HeaderError::InvalidOctal`] if octal parsing fails.
1605pub(crate) fn parse_numeric(bytes: &[u8]) -> Result<u64> {
1606    if bytes.is_empty() {
1607        return Ok(0);
1608    }
1609
1610    // Check for GNU base-256 encoding (high bit set)
1611    if bytes[0] & 0x80 != 0 {
1612        // Base-256: interpret remaining bytes as big-endian, masking off the
1613        // high bit of the first byte
1614        let mut value: u64 = 0;
1615        for (i, &byte) in bytes.iter().enumerate() {
1616            let b = if i == 0 { byte & 0x7f } else { byte };
1617            value = value
1618                .checked_shl(8)
1619                .and_then(|v| v.checked_add(u64::from(b)))
1620                .ok_or_else(|| HeaderError::InvalidOctal(bytes.to_vec()))?;
1621        }
1622        Ok(value)
1623    } else {
1624        // Standard octal ASCII
1625        parse_octal(bytes)
1626    }
1627}
1628
1629/// Truncate a byte slice at the first null byte.
1630///
1631/// Used to extract null-terminated strings from fixed-size header fields.
1632/// If no null byte is found, returns the entire slice.
1633#[must_use]
1634pub(crate) fn truncate_null(bytes: &[u8]) -> &[u8] {
1635    match bytes.iter().position(|&b| b == 0) {
1636        Some(pos) => &bytes[..pos],
1637        None => bytes,
1638    }
1639}
1640
1641// ============================================================================
1642// PAX Extended Headers
1643// ============================================================================
1644
1645/// PAX extended header key for the file path.
1646pub const PAX_PATH: &str = "path";
1647/// PAX extended header key for the link target path.
1648pub const PAX_LINKPATH: &str = "linkpath";
1649/// PAX extended header key for file size.
1650pub const PAX_SIZE: &str = "size";
1651/// PAX extended header key for owner user ID.
1652pub const PAX_UID: &str = "uid";
1653/// PAX extended header key for owner group ID.
1654pub const PAX_GID: &str = "gid";
1655/// PAX extended header key for owner user name.
1656pub const PAX_UNAME: &str = "uname";
1657/// PAX extended header key for owner group name.
1658pub const PAX_GNAME: &str = "gname";
1659/// PAX extended header key for modification time.
1660pub const PAX_MTIME: &str = "mtime";
1661/// PAX extended header key for access time.
1662pub const PAX_ATIME: &str = "atime";
1663/// PAX extended header key for change time.
1664pub const PAX_CTIME: &str = "ctime";
1665/// PAX extended header prefix for SCHILY extended attributes.
1666pub const PAX_SCHILY_XATTR: &str = "SCHILY.xattr.";
1667
1668/// PAX extended header prefix for GNU sparse file extensions.
1669pub const PAX_GNU_SPARSE: &str = "GNU.sparse.";
1670/// PAX key for GNU sparse file number of blocks.
1671pub const PAX_GNU_SPARSE_NUMBLOCKS: &str = "GNU.sparse.numblocks";
1672/// PAX key for GNU sparse file offset.
1673pub const PAX_GNU_SPARSE_OFFSET: &str = "GNU.sparse.offset";
1674/// PAX key for GNU sparse file numbytes.
1675pub const PAX_GNU_SPARSE_NUMBYTES: &str = "GNU.sparse.numbytes";
1676/// PAX key for GNU sparse file map.
1677pub const PAX_GNU_SPARSE_MAP: &str = "GNU.sparse.map";
1678/// PAX key for GNU sparse file name.
1679pub const PAX_GNU_SPARSE_NAME: &str = "GNU.sparse.name";
1680/// PAX key for GNU sparse file format major version.
1681pub const PAX_GNU_SPARSE_MAJOR: &str = "GNU.sparse.major";
1682/// PAX key for GNU sparse file format minor version.
1683pub const PAX_GNU_SPARSE_MINOR: &str = "GNU.sparse.minor";
1684/// PAX key for GNU sparse file size.
1685pub const PAX_GNU_SPARSE_SIZE: &str = "GNU.sparse.size";
1686/// PAX key for GNU sparse file real size.
1687pub const PAX_GNU_SPARSE_REALSIZE: &str = "GNU.sparse.realsize";
1688
1689/// Error parsing a PAX extension record.
1690#[derive(Debug, Error)]
1691pub enum PaxError {
1692    /// The record format is malformed.
1693    #[error("malformed PAX extension record")]
1694    Malformed,
1695    /// The key is not valid UTF-8.
1696    #[error("PAX key is not valid UTF-8: {0}")]
1697    InvalidKey(#[from] core::str::Utf8Error),
1698}
1699
1700#[cfg(feature = "std")]
1701impl From<PaxError> for std::io::Error {
1702    fn from(e: PaxError) -> Self {
1703        std::io::Error::other(e.to_string())
1704    }
1705}
1706
1707/// A single PAX extended header key/value pair.
1708#[derive(Debug, Clone)]
1709pub struct PaxExtension<'a> {
1710    key: &'a [u8],
1711    value: &'a [u8],
1712}
1713
1714impl<'a> PaxExtension<'a> {
1715    /// Returns the key as a string.
1716    ///
1717    /// # Errors
1718    ///
1719    /// Returns an error if the key is not valid UTF-8.
1720    pub fn key(&self) -> core::result::Result<&'a str, core::str::Utf8Error> {
1721        core::str::from_utf8(self.key)
1722    }
1723
1724    /// Returns the raw key bytes.
1725    #[must_use]
1726    pub fn key_bytes(&self) -> &'a [u8] {
1727        self.key
1728    }
1729
1730    /// Returns the value as a string.
1731    ///
1732    /// # Errors
1733    ///
1734    /// Returns an error if the value is not valid UTF-8.
1735    pub fn value(&self) -> core::result::Result<&'a str, core::str::Utf8Error> {
1736        core::str::from_utf8(self.value)
1737    }
1738
1739    /// Returns the raw value bytes.
1740    #[must_use]
1741    pub fn value_bytes(&self) -> &'a [u8] {
1742        self.value
1743    }
1744}
1745
1746/// Iterator over PAX extended header records.
1747///
1748/// PAX extended headers consist of records in the format:
1749/// `<length> <key>=<value>\n`
1750///
1751/// where `<length>` is the total record length including the length field itself.
1752///
1753/// # Example
1754///
1755/// ```
1756/// use tar_core::PaxExtensions;
1757///
1758/// let data = b"20 path=foo/bar.txt\n";
1759/// let mut iter = PaxExtensions::new(data);
1760/// let ext = iter.next().unwrap().unwrap();
1761/// assert_eq!(ext.key().unwrap(), "path");
1762/// assert_eq!(ext.value().unwrap(), "foo/bar.txt");
1763/// ```
1764#[derive(Debug)]
1765pub struct PaxExtensions<'a> {
1766    data: &'a [u8],
1767}
1768
1769impl<'a> PaxExtensions<'a> {
1770    /// Create a new iterator over PAX extension records.
1771    #[must_use]
1772    pub fn new(data: &'a [u8]) -> Self {
1773        Self { data }
1774    }
1775
1776    /// Look up a specific key and return its value as a string.
1777    ///
1778    /// Returns `None` if the key is not found or if parsing fails.
1779    #[must_use]
1780    pub fn get(&self, key: &str) -> Option<&'a str> {
1781        for ext in PaxExtensions::new(self.data).flatten() {
1782            if ext.key().ok() == Some(key) {
1783                return ext.value().ok();
1784            }
1785        }
1786        None
1787    }
1788
1789    /// Look up a specific key and parse its value as u64.
1790    ///
1791    /// Returns `None` if the key is not found, parsing fails, or the value
1792    /// is not a valid integer.
1793    #[must_use]
1794    pub fn get_u64(&self, key: &str) -> Option<u64> {
1795        self.get(key).and_then(|v| v.parse().ok())
1796    }
1797}
1798
1799impl<'a> Iterator for PaxExtensions<'a> {
1800    type Item = core::result::Result<PaxExtension<'a>, PaxError>;
1801
1802    fn next(&mut self) -> Option<Self::Item> {
1803        if self.data.is_empty() {
1804            return None;
1805        }
1806
1807        // Format: "<len> <key>=<value>\n"
1808        // Split off the decimal length field at the first space.
1809        let (len_bytes, _) = self
1810            .data
1811            .split_at(self.data.iter().position(|&b| b == b' ')?);
1812        let len: usize = core::str::from_utf8(len_bytes).ok()?.parse().ok()?;
1813
1814        // The record is exactly `len` bytes (including the length field itself).
1815        let record = match self.data.get(..len) {
1816            Some(r) => r,
1817            None => return Some(Err(PaxError::Malformed)),
1818        };
1819
1820        // Must end with newline.
1821        if record.last() != Some(&b'\n') {
1822            return Some(Err(PaxError::Malformed));
1823        }
1824
1825        // Everything between the space and the trailing newline is "key=value".
1826        // `len_bytes.len() + 1` skips past the space; strip the trailing '\n'.
1827        let kv = match record.get(len_bytes.len() + 1..record.len() - 1) {
1828            Some(kv) => kv,
1829            None => return Some(Err(PaxError::Malformed)),
1830        };
1831
1832        // Split key and value at the first '='.  Values may contain '='
1833        // so we only split on the first one.
1834        let Some(eq_pos) = kv.iter().position(|&b| b == b'=') else {
1835            return Some(Err(PaxError::Malformed));
1836        };
1837        let (key, value) = (&kv[..eq_pos], &kv[eq_pos + 1..]);
1838
1839        // Advance past this record.
1840        self.data = &self.data[len..];
1841
1842        Some(Ok(PaxExtension { key, value }))
1843    }
1844}
1845
1846#[cfg(test)]
1847mod tests {
1848    use super::*;
1849
1850    #[test]
1851    fn test_header_size() {
1852        assert_eq!(size_of::<OldHeader>(), HEADER_SIZE);
1853        assert_eq!(size_of::<UstarHeader>(), HEADER_SIZE);
1854        assert_eq!(size_of::<GnuHeader>(), HEADER_SIZE);
1855        assert_eq!(size_of::<GnuExtSparseHeader>(), HEADER_SIZE);
1856        assert_eq!(size_of::<Header>(), HEADER_SIZE);
1857    }
1858
1859    #[test]
1860    fn test_sparse_header_size() {
1861        // Each sparse header is 24 bytes (12 + 12)
1862        assert_eq!(size_of::<GnuSparseHeader>(), 24);
1863        // Extended sparse: 21 * 24 + 1 + 7 = 512
1864        assert_eq!(21 * 24 + 1 + 7, HEADER_SIZE);
1865    }
1866
1867    #[test]
1868    fn test_new_ustar() {
1869        let header = Header::new_ustar();
1870        assert!(header.is_ustar());
1871        assert!(!header.is_gnu());
1872    }
1873
1874    #[test]
1875    fn test_new_gnu() {
1876        let header = Header::new_gnu();
1877        assert!(header.is_gnu());
1878        assert!(!header.is_ustar());
1879    }
1880
1881    #[test]
1882    fn test_parse_octal() {
1883        let cases: &[(&[u8], u64)] = &[
1884            (b"0000644\0", 0o644),
1885            (b"0000755\0", 0o755),
1886            (b"     123 ", 0o123),
1887            (b"0", 0),
1888            (b"", 0),
1889            (b"   \0\0\0", 0),
1890            (b"        ", 0),
1891            (b"\0\0\0\0\0\0", 0),
1892            (b"      7\0", 7),
1893            (b"0000755", 0o755),
1894            (b"7", 7),
1895            (b"00000001", 1),
1896            (b"77777777777\0", 0o77777777777),
1897            (b"7777777\0", 0o7777777),
1898        ];
1899        for (input, expected) in cases {
1900            assert_eq!(
1901                parse_octal(input).unwrap(),
1902                *expected,
1903                "parse_octal({input:?})"
1904            );
1905        }
1906
1907        for bad in [&b"abc"[..], b"128"] {
1908            assert!(parse_octal(bad).is_err(), "should reject {bad:?}");
1909        }
1910    }
1911
1912    #[test]
1913    fn test_truncate_null() {
1914        let cases: &[(&[u8], &[u8])] = &[
1915            (b"hello\0world", b"hello"),
1916            (b"no null", b"no null"),
1917            (b"\0start", b""),
1918            (b"", b""),
1919        ];
1920        for (input, expected) in cases {
1921            assert_eq!(truncate_null(input), *expected, "truncate_null({input:?})");
1922        }
1923    }
1924
1925    #[test]
1926    fn test_entry_type_roundtrip() {
1927        // Every known type should survive a byte round-trip.
1928        let types = [
1929            (b'0', EntryType::Regular),
1930            (b'\0', EntryType::Regular), // Old tar convention
1931            (b'1', EntryType::Link),
1932            (b'2', EntryType::Symlink),
1933            (b'3', EntryType::Char),
1934            (b'4', EntryType::Block),
1935            (b'5', EntryType::Directory),
1936            (b'6', EntryType::Fifo),
1937            (b'7', EntryType::Continuous),
1938            (b'L', EntryType::GnuLongName),
1939            (b'K', EntryType::GnuLongLink),
1940            (b'S', EntryType::GnuSparse),
1941            (b'x', EntryType::XHeader),
1942            (b'g', EntryType::XGlobalHeader),
1943        ];
1944        for (byte, expected) in types {
1945            let parsed = EntryType::from_byte(byte);
1946            assert_eq!(parsed, expected, "from_byte({byte:#x})");
1947            // Non-alias types should round-trip through to_byte.
1948            if byte != b'\0' {
1949                assert_eq!(parsed.to_byte(), byte);
1950            }
1951        }
1952    }
1953
1954    #[test]
1955    fn test_entry_type_predicates() {
1956        let cases: &[(EntryType, bool, bool, bool, bool)] = &[
1957            //                       file   dir    sym    hard
1958            (EntryType::Regular, true, false, false, false),
1959            (EntryType::Continuous, true, false, false, false),
1960            (EntryType::Directory, false, true, false, false),
1961            (EntryType::Symlink, false, false, true, false),
1962            (EntryType::Link, false, false, false, true),
1963            (EntryType::Char, false, false, false, false),
1964        ];
1965        for &(ty, file, dir, sym, hard) in cases {
1966            assert_eq!(ty.is_file(), file, "{ty:?}.is_file()");
1967            assert_eq!(ty.is_dir(), dir, "{ty:?}.is_dir()");
1968            assert_eq!(ty.is_symlink(), sym, "{ty:?}.is_symlink()");
1969            assert_eq!(ty.is_hard_link(), hard, "{ty:?}.is_hard_link()");
1970        }
1971    }
1972
1973    #[test]
1974    fn test_checksum_empty_header() {
1975        let header = Header::new_ustar();
1976        // Computed checksum should be consistent
1977        let checksum = header.compute_checksum();
1978        // For an empty header with only magic/version set, checksum includes:
1979        // - 148 spaces (0x20) for checksum field = 148 * 32 = 4736
1980        // - "ustar\0" = 117+115+116+97+114+0 = 559
1981        // - "00" = 48+48 = 96
1982        // - Rest are zeros
1983        assert!(checksum > 0);
1984    }
1985
1986    #[test]
1987    fn test_is_empty() {
1988        let mut header = Header::new_ustar();
1989        assert!(!header.is_empty());
1990
1991        // Create truly empty header
1992        header.as_mut_bytes().fill(0);
1993        assert!(header.is_empty());
1994    }
1995
1996    #[test]
1997    fn test_as_format_views() {
1998        let header = Header::new_ustar();
1999
2000        // All views should work without panicking
2001        let _old = header.as_old();
2002        let _ustar = header.as_ustar();
2003        let _gnu = header.as_gnu();
2004    }
2005
2006    #[test]
2007    fn test_ustar_default_magic() {
2008        let ustar = UstarHeader::default();
2009        assert_eq!(&ustar.magic, USTAR_MAGIC);
2010        assert_eq!(&ustar.version, USTAR_VERSION);
2011    }
2012
2013    #[test]
2014    fn test_gnu_default_magic() {
2015        let gnu = GnuHeader::default();
2016        assert_eq!(&gnu.magic, GNU_MAGIC);
2017        assert_eq!(&gnu.version, GNU_VERSION);
2018    }
2019
2020    #[test]
2021    fn test_path_bytes() {
2022        let mut header = Header::new_ustar();
2023        header.as_mut_bytes()[0..5].copy_from_slice(b"hello");
2024        assert_eq!(header.path_bytes(), b"hello");
2025    }
2026
2027    #[test]
2028    fn test_link_name_bytes() {
2029        let mut header = Header::new_ustar();
2030        header.as_mut_bytes()[157..163].copy_from_slice(b"target");
2031        assert_eq!(header.link_name_bytes(), b"target");
2032    }
2033
2034    #[test]
2035    fn test_username_groupname() {
2036        let header = Header::new_ustar();
2037        assert!(header.username().is_some());
2038        assert!(header.groupname().is_some());
2039
2040        // Old-style header should return None
2041        let mut old_header = Header::new_ustar();
2042        old_header.as_mut_bytes()[257..265].fill(0);
2043        assert!(old_header.username().is_none());
2044        assert!(old_header.groupname().is_none());
2045    }
2046
2047    #[test]
2048    fn test_prefix() {
2049        let header = Header::new_ustar();
2050        assert!(header.prefix().is_some());
2051
2052        let gnu_header = Header::new_gnu();
2053        // GNU format doesn't use prefix the same way
2054        assert!(gnu_header.prefix().is_none());
2055    }
2056
2057    #[test]
2058    fn test_device_numbers() {
2059        let header = Header::new_ustar();
2060        assert!(header.device_major().unwrap().is_some());
2061        assert!(header.device_minor().unwrap().is_some());
2062
2063        // Old-style header should return None
2064        let mut old_header = Header::new_ustar();
2065        old_header.as_mut_bytes()[257..265].fill(0);
2066        assert!(old_header.device_major().unwrap().is_none());
2067        assert!(old_header.device_minor().unwrap().is_none());
2068    }
2069
2070    #[test]
2071    fn test_debug_impls() {
2072        // Exercise Debug impls to verify they don't panic; the formatted
2073        // string itself is irrelevant.
2074        let header = Header::new_ustar();
2075        let _ = format!("{header:?}");
2076        let _ = format!("{:?}", header.as_old());
2077        let _ = format!("{:?}", header.as_ustar());
2078        let _ = format!("{:?}", header.as_gnu());
2079        let _ = format!("{:?}", GnuExtSparseHeader::default());
2080        let _ = format!("{:?}", GnuSparseHeader::default());
2081    }
2082
2083    #[test]
2084    fn test_parse_numeric() {
2085        // Octal cases (same as parse_octal)
2086        let octal_cases: &[(&[u8], u64)] = &[
2087            (b"0000644\0", 0o644),
2088            (b"0000755\0", 0o755),
2089            (b"     123 ", 0o123),
2090            (b"", 0),
2091        ];
2092        for (input, expected) in octal_cases {
2093            assert_eq!(
2094                parse_numeric(input).unwrap(),
2095                *expected,
2096                "parse_numeric({input:?})"
2097            );
2098        }
2099
2100        // Base-256 cases: high bit set, remaining bytes are big-endian value
2101        let base256_cases: &[(&[u8], u64)] = &[
2102            (&[0x80, 0x00, 0x00, 0x01], 1),
2103            (&[0x80, 0x00, 0x01, 0x00], 256),
2104            (&[0x80, 0xFF], 255),
2105            (
2106                &[
2107                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
2108                ],
2109                1 << 40, // 1099511627776
2110            ),
2111        ];
2112        for (input, expected) in base256_cases {
2113            assert_eq!(
2114                parse_numeric(input).unwrap(),
2115                *expected,
2116                "parse_numeric({input:?})"
2117            );
2118        }
2119    }
2120
2121    #[test]
2122    fn test_parse_numeric_base256_in_header() {
2123        // Test that base-256 encoded size field works in Header
2124        let mut header = Header::new_ustar();
2125
2126        // Set size field (bytes 124..136) to base-256 encoded value
2127        // 12-byte field: first byte has 0x80 marker, remaining 11 bytes are the value
2128        // We want to encode a large value that wouldn't fit in octal
2129        let size_field = &mut header.as_mut_bytes()[124..136];
2130        size_field.fill(0);
2131        size_field[0] = 0x80; // base-256 marker (first byte & 0x7f = 0)
2132                              // Put value in last 4 bytes for simplicity: 0x12345678
2133        size_field[8] = 0x12;
2134        size_field[9] = 0x34;
2135        size_field[10] = 0x56;
2136        size_field[11] = 0x78;
2137
2138        assert_eq!(header.entry_size().unwrap(), 0x12345678);
2139    }
2140
2141    #[test]
2142    fn test_parse_numeric_base256_uid_gid() {
2143        let mut header = Header::new_ustar();
2144
2145        // Set uid field (bytes 108..116) to base-256 encoded value
2146        let uid_field = &mut header.as_mut_bytes()[108..116];
2147        uid_field.fill(0);
2148        uid_field[0] = 0x80; // base-256 marker
2149        uid_field[7] = 0x42; // value = 66
2150        assert_eq!(header.uid().unwrap(), 66);
2151
2152        // Set gid field (bytes 116..124) to base-256 encoded value
2153        let gid_field = &mut header.as_mut_bytes()[116..124];
2154        gid_field.fill(0);
2155        gid_field[0] = 0x80; // base-256 marker
2156        gid_field[6] = 0x01;
2157        gid_field[7] = 0x00; // value = 256
2158        assert_eq!(header.gid().unwrap(), 256);
2159    }
2160
2161    #[test]
2162    fn test_from_bytes() {
2163        let mut data = [0u8; 512];
2164        // Set up a valid UStar header
2165        data[257..263].copy_from_slice(USTAR_MAGIC);
2166        data[263..265].copy_from_slice(USTAR_VERSION);
2167        data[0..4].copy_from_slice(b"test");
2168
2169        let header = Header::from_bytes(&data);
2170        assert!(header.is_ustar());
2171        assert_eq!(header.path_bytes(), b"test");
2172    }
2173
2174    #[test]
2175    fn test_from_bytes_gnu() {
2176        let mut data = [0u8; 512];
2177        data[257..263].copy_from_slice(GNU_MAGIC);
2178        data[263..265].copy_from_slice(GNU_VERSION);
2179
2180        let header = Header::from_bytes(&data);
2181        assert!(header.is_gnu());
2182        assert!(!header.is_ustar());
2183    }
2184
2185    // =========================================================================
2186    // PAX Extension Tests
2187    // =========================================================================
2188
2189    #[test]
2190    fn test_pax_simple() {
2191        let data = b"20 path=foo/bar.txt\n";
2192        let mut iter = PaxExtensions::new(data);
2193        let ext = iter.next().unwrap().unwrap();
2194        assert_eq!(ext.key().unwrap(), "path");
2195        assert_eq!(ext.value().unwrap(), "foo/bar.txt");
2196        assert!(iter.next().is_none());
2197    }
2198
2199    #[test]
2200    fn test_pax_multiple() {
2201        let data = b"20 path=foo/bar.txt\n12 uid=1000\n12 gid=1000\n";
2202        let exts: Vec<_> = PaxExtensions::new(data).collect();
2203        assert_eq!(exts.len(), 3);
2204        assert_eq!(exts[0].as_ref().unwrap().key().unwrap(), "path");
2205        assert_eq!(exts[0].as_ref().unwrap().value().unwrap(), "foo/bar.txt");
2206        assert_eq!(exts[1].as_ref().unwrap().key().unwrap(), "uid");
2207        assert_eq!(exts[1].as_ref().unwrap().value().unwrap(), "1000");
2208        assert_eq!(exts[2].as_ref().unwrap().key().unwrap(), "gid");
2209        assert_eq!(exts[2].as_ref().unwrap().value().unwrap(), "1000");
2210    }
2211
2212    #[test]
2213    fn test_pax_get() {
2214        let data = b"20 path=foo/bar.txt\n12 uid=1000\n16 size=1234567\n";
2215        let pax = PaxExtensions::new(data);
2216
2217        let str_cases: &[(&str, Option<&str>)] = &[
2218            ("path", Some("foo/bar.txt")),
2219            ("uid", Some("1000")),
2220            ("missing", None),
2221        ];
2222        for (key, expected) in str_cases {
2223            assert_eq!(pax.get(key), *expected, "get({key:?})");
2224        }
2225
2226        let u64_cases: &[(&str, Option<u64>)] = &[
2227            ("uid", Some(1000)),
2228            ("size", Some(1234567)),
2229            ("missing", None),
2230        ];
2231        for (key, expected) in u64_cases {
2232            assert_eq!(pax.get_u64(key), *expected, "get_u64({key:?})");
2233        }
2234    }
2235
2236    #[test]
2237    fn test_pax_empty() {
2238        let data = b"";
2239        let mut iter = PaxExtensions::new(data);
2240        assert!(iter.next().is_none());
2241    }
2242
2243    #[test]
2244    fn test_pax_binary_value() {
2245        // PAX values can contain binary data (e.g., xattrs)
2246        // Format: "<len> <key>=<value>\n" where len includes everything
2247        // 24 = 2 (digits) + 1 (space) + 16 (key) + 1 (=) + 3 (value) + 1 (newline)
2248        let data = b"24 SCHILY.xattr.foo=\x00\x01\x02\n";
2249        let mut iter = PaxExtensions::new(data);
2250        let ext = iter.next().unwrap().unwrap();
2251        assert_eq!(ext.key().unwrap(), "SCHILY.xattr.foo");
2252        assert_eq!(ext.value_bytes(), b"\x00\x01\x02");
2253    }
2254
2255    #[test]
2256    fn test_pax_long_path() {
2257        // Test a path that's exactly at various boundary lengths
2258        let long_path = "a".repeat(200);
2259        // PAX format: "length path=value\n" where length includes ALL bytes including itself
2260        // For 200-char path: 5 (path=) + 1 (\n) + 200 (value) + 1 (space) + 3 (length digits) = 210
2261        let record = format!("210 path={}\n", long_path);
2262        let data = record.as_bytes();
2263        let pax = PaxExtensions::new(data);
2264        assert_eq!(pax.get("path"), Some(long_path.as_str()));
2265    }
2266
2267    #[test]
2268    fn test_pax_unicode_path() {
2269        // PAX supports UTF-8 paths
2270        let data = "35 path=日本語/ファイル.txt\n".as_bytes();
2271        let pax = PaxExtensions::new(data);
2272        assert_eq!(pax.get("path"), Some("日本語/ファイル.txt"));
2273    }
2274
2275    #[test]
2276    fn test_pax_mtime_fractional() {
2277        // PAX mtime can have fractional seconds
2278        let data = b"22 mtime=1234567890.5\n";
2279        let pax = PaxExtensions::new(data);
2280        assert_eq!(pax.get("mtime"), Some("1234567890.5"));
2281        // get_u64 won't parse fractional
2282        assert_eq!(pax.get_u64("mtime"), None);
2283    }
2284
2285    #[test]
2286    fn test_pax_schily_xattr() {
2287        let data = b"30 SCHILY.xattr.user.test=val\n";
2288        let mut iter = PaxExtensions::new(data);
2289        let ext = iter.next().unwrap().unwrap();
2290        let key = ext.key().unwrap();
2291        assert_eq!(key.strip_prefix(PAX_SCHILY_XATTR), Some("user.test"));
2292    }
2293
2294    #[test]
2295    fn test_pax_malformed() {
2296        let cases: &[&[u8]] = &[
2297            b"15 pathfoobar\n", // no '='
2298            b"100 path=foo\n",  // length exceeds record
2299        ];
2300        for bad in cases {
2301            let result = PaxExtensions::new(bad).next().unwrap();
2302            assert!(result.is_err(), "should reject {bad:?}");
2303        }
2304    }
2305
2306    // =========================================================================
2307    // Edge Case Tests
2308    // =========================================================================
2309
2310    #[test]
2311    fn test_path_exactly_100_bytes() {
2312        // Path that fills entire name field (no null terminator needed)
2313        let mut header = Header::new_ustar();
2314        let path = "a".repeat(100);
2315        header.as_mut_bytes()[0..100].copy_from_slice(path.as_bytes());
2316
2317        assert_eq!(header.path_bytes().len(), 100);
2318        assert_eq!(header.path_bytes(), path.as_bytes());
2319    }
2320
2321    #[test]
2322    fn test_link_name_exactly_100_bytes() {
2323        let mut header = Header::new_ustar();
2324        let target = "t".repeat(100);
2325        header.as_mut_bytes()[157..257].copy_from_slice(target.as_bytes());
2326
2327        assert_eq!(header.link_name_bytes().len(), 100);
2328        assert_eq!(header.link_name_bytes(), target.as_bytes());
2329    }
2330
2331    #[test]
2332    fn test_prefix_exactly_155_bytes() {
2333        let mut header = Header::new_ustar();
2334        let prefix = "p".repeat(155);
2335        header.as_mut_bytes()[345..500].copy_from_slice(prefix.as_bytes());
2336
2337        assert_eq!(header.prefix().unwrap().len(), 155);
2338        assert_eq!(header.prefix().unwrap(), prefix.as_bytes());
2339    }
2340
2341    #[test]
2342    fn test_sparse_header_parsing() {
2343        let header = Header::new_gnu();
2344        let gnu = header.as_gnu();
2345
2346        // Default sparse headers should have zero offset and numbytes
2347        for sparse in &gnu.sparse {
2348            assert_eq!(parse_octal(&sparse.offset).unwrap(), 0);
2349            assert_eq!(parse_octal(&sparse.numbytes).unwrap(), 0);
2350        }
2351    }
2352
2353    #[test]
2354    fn test_gnu_atime_ctime() {
2355        let mut header = Header::new_gnu();
2356        let gnu = header.as_gnu();
2357
2358        // Default should be zeros
2359        assert_eq!(parse_octal(&gnu.atime).unwrap(), 0);
2360        assert_eq!(parse_octal(&gnu.ctime).unwrap(), 0);
2361
2362        // Set some values (valid octal: 12345670123)
2363        header.as_mut_bytes()[345..356].copy_from_slice(b"12345670123");
2364        let gnu = header.as_gnu();
2365        assert_eq!(parse_octal(&gnu.atime).unwrap(), 0o12345670123);
2366    }
2367
2368    #[test]
2369    fn test_ext_sparse_header() {
2370        let ext = GnuExtSparseHeader::default();
2371        assert_eq!(ext.isextended[0], 0);
2372        assert_eq!(ext.sparse.len(), 21);
2373
2374        // Verify size is exactly 512 bytes
2375        assert_eq!(size_of::<GnuExtSparseHeader>(), HEADER_SIZE);
2376    }
2377
2378    #[test]
2379    fn test_base256_max_values() {
2380        // Large UID that needs base-256
2381        let mut bytes = [0u8; 8];
2382        bytes[0] = 0x80; // marker
2383        bytes[4] = 0xFF;
2384        bytes[5] = 0xFF;
2385        bytes[6] = 0xFF;
2386        bytes[7] = 0xFF;
2387        assert_eq!(parse_numeric(&bytes).unwrap(), 0xFFFFFFFF);
2388    }
2389
2390    // =========================================================================
2391    // Tests for encode_numeric and setter methods
2392    // =========================================================================
2393
2394    #[test]
2395    fn test_encode_numeric_roundtrip() {
2396        fn check<const N: usize>(value: u64, expect_b256: bool) {
2397            let mut field = [0u8; N];
2398            encode_numeric(&mut field, value).unwrap();
2399            assert_eq!(
2400                field[0] & 0x80 != 0,
2401                expect_b256,
2402                "base256 flag for {value} in {N}-byte field"
2403            );
2404            assert_eq!(
2405                parse_numeric(&field).unwrap(),
2406                value,
2407                "roundtrip {value} in {N}-byte field"
2408            );
2409        }
2410
2411        // 12-byte field: octal range
2412        check::<12>(0, false);
2413        check::<12>(0o644, false);
2414        check::<12>(0o77777777777, false);
2415        // 12-byte field: base-256 (>= 8GB threshold)
2416        check::<12>(8_589_934_592, true);
2417        check::<12>(0x1234_5678_90AB_CDEF, true);
2418        // 8-byte field (uid/gid): octal range
2419        check::<8>(0, false);
2420        check::<8>(2_097_151, false); // just below threshold
2421                                      // 8-byte field: base-256 (>= 2^21 threshold)
2422        check::<8>(2_097_152, true);
2423    }
2424
2425    #[test]
2426    fn test_header_format_detection() {
2427        // (header, is_ustar, is_gnu)
2428        let cases: &[(Header, bool, bool)] = &[
2429            (Header::new_ustar(), true, false),
2430            (Header::new_gnu(), false, true),
2431            (Header::new_old(), false, false),
2432        ];
2433        for (header, ustar, gnu) in cases {
2434            assert_eq!(header.is_ustar(), *ustar, "{header:?}");
2435            assert_eq!(header.is_gnu(), *gnu, "{header:?}");
2436            assert_eq!(header.try_as_ustar().is_some(), *ustar);
2437            assert_eq!(header.try_as_gnu().is_some(), *gnu);
2438        }
2439    }
2440
2441    #[test]
2442    fn test_header_mutable_views() {
2443        let mut header = Header::new_ustar();
2444
2445        // Test mutable views exist and work
2446        let _old = header.as_old_mut();
2447        let _ustar = header.as_ustar_mut();
2448        let _gnu = header.as_gnu_mut();
2449
2450        // Test try_as_*_mut
2451        let mut ustar_header = Header::new_ustar();
2452        assert!(ustar_header.try_as_ustar_mut().is_some());
2453        assert!(ustar_header.try_as_gnu_mut().is_none());
2454    }
2455
2456    #[test]
2457    fn test_header_setters() {
2458        let mut header = Header::new_ustar();
2459
2460        // Fallible numeric field setters: (set, get, value)
2461        type NumericCase = (
2462            fn(&mut Header, u64) -> Result<()>,
2463            fn(&Header) -> Result<u64>,
2464            u64,
2465        );
2466        let numeric_cases: &[NumericCase] = &[
2467            (|h, v| h.set_size(v), |h| h.entry_size(), 1024),
2468            (|h, v| h.set_uid(v), |h| h.uid(), 1000),
2469            (|h, v| h.set_gid(v), |h| h.gid(), 1000),
2470            (|h, v| h.set_mtime(v), |h| h.mtime(), 1234567890),
2471        ];
2472        for (set, get, value) in numeric_cases {
2473            set(&mut header, *value).unwrap();
2474            assert_eq!(get(&header).unwrap(), *value, "roundtrip {value}");
2475        }
2476
2477        header.set_mode(0o755).unwrap();
2478        assert_eq!(header.mode().unwrap(), 0o755);
2479
2480        header.set_entry_type(EntryType::Directory);
2481        assert_eq!(header.entry_type(), EntryType::Directory);
2482
2483        header.set_path(b"test.txt").unwrap();
2484        assert_eq!(header.path_bytes(), b"test.txt");
2485
2486        header.set_link_name(b"target").unwrap();
2487        assert_eq!(header.link_name_bytes(), b"target");
2488
2489        header.set_checksum();
2490        header.verify_checksum().unwrap();
2491    }
2492
2493    #[test]
2494    fn test_format_aware_encoding() {
2495        let large_uid: u64 = 0xFFFF_FFFF; // exceeds 8-byte octal max (2097151)
2496        let large_size: u64 = 10_000_000_000; // exceeds 12-byte octal max (8589934591)
2497
2498        // GNU headers accept large values via base-256.
2499        let mut gnu = Header::new_gnu();
2500        gnu.set_uid(large_uid).unwrap();
2501        assert_eq!(gnu.uid().unwrap(), large_uid);
2502        gnu.set_size(large_size).unwrap();
2503        assert_eq!(gnu.entry_size().unwrap(), large_size);
2504
2505        // UStar headers reject values that exceed octal capacity.
2506        let mut ustar = Header::new_ustar();
2507        assert!(ustar.set_uid(large_uid).is_err());
2508        assert!(ustar.set_size(large_size).is_err());
2509
2510        // UStar headers accept values within octal range.
2511        ustar.set_uid(1000).unwrap();
2512        ustar.set_size(1024).unwrap();
2513    }
2514
2515    #[test]
2516    fn test_gnu_header_atime_ctime_setters() {
2517        let mut header = Header::new_gnu();
2518        let gnu = header.as_gnu_mut();
2519
2520        gnu.set_atime(1234567890);
2521        assert_eq!(gnu.atime().unwrap(), 1234567890);
2522
2523        gnu.set_ctime(1234567891);
2524        assert_eq!(gnu.ctime().unwrap(), 1234567891);
2525    }
2526
2527    #[test]
2528    fn test_gnu_header_real_size() {
2529        let mut header = Header::new_gnu();
2530        let gnu = header.as_gnu_mut();
2531
2532        gnu.set_real_size(1_000_000);
2533        assert_eq!(gnu.real_size().unwrap(), 1_000_000);
2534
2535        // Large value
2536        gnu.set_real_size(10_000_000_000);
2537        assert_eq!(gnu.real_size().unwrap(), 10_000_000_000);
2538    }
2539
2540    #[test]
2541    fn test_gnu_header_is_extended() {
2542        let mut header = Header::new_gnu();
2543        let gnu = header.as_gnu_mut();
2544
2545        assert!(!gnu.is_extended());
2546        gnu.set_is_extended(true);
2547        assert!(gnu.is_extended());
2548        gnu.set_is_extended(false);
2549        assert!(!gnu.is_extended());
2550    }
2551
2552    /// Cross-checking tests against the `tar` crate using proptest.
2553    mod proptest_tests {
2554        use super::*;
2555        use proptest::prelude::*;
2556        use std::io::Cursor;
2557
2558        /// Tar header format to test. Proptest generates both variants so
2559        /// each property is checked against UStar and GNU automatically.
2560        #[derive(Debug, Clone, Copy)]
2561        enum TarFormat {
2562            Ustar,
2563            Gnu,
2564        }
2565
2566        fn tar_format_strategy() -> impl Strategy<Value = TarFormat> {
2567            prop_oneof![Just(TarFormat::Ustar), Just(TarFormat::Gnu)]
2568        }
2569
2570        impl TarFormat {
2571            fn header_builder(self) -> crate::builder::HeaderBuilder {
2572                match self {
2573                    TarFormat::Ustar => crate::builder::HeaderBuilder::new_ustar(),
2574                    TarFormat::Gnu => crate::builder::HeaderBuilder::new_gnu(),
2575                }
2576            }
2577
2578            fn tar_rs_header(self) -> tar::Header {
2579                match self {
2580                    TarFormat::Ustar => tar::Header::new_ustar(),
2581                    TarFormat::Gnu => tar::Header::new_gnu(),
2582                }
2583            }
2584
2585            fn our_header(self) -> Header {
2586                match self {
2587                    TarFormat::Ustar => Header::new_ustar(),
2588                    TarFormat::Gnu => Header::new_gnu(),
2589                }
2590            }
2591        }
2592
2593        /// Copy a tar-rs header into a `[u8; 512]`.
2594        fn tar_rs_bytes(header: &tar::Header) -> [u8; 512] {
2595            *header.as_bytes()
2596        }
2597
2598        /// Format header bytes as labeled fields for readable diffs.
2599        fn header_hex(bytes: &[u8; 512]) -> String {
2600            let fields: &[(&str, std::ops::Range<usize>)] = &[
2601                ("name", 0..100),
2602                ("mode", 100..108),
2603                ("uid", 108..116),
2604                ("gid", 116..124),
2605                ("size", 124..136),
2606                ("mtime", 136..148),
2607                ("checksum", 148..156),
2608                ("typeflag", 156..157),
2609                ("linkname", 157..257),
2610                ("magic", 257..263),
2611                ("version", 263..265),
2612                ("uname", 265..297),
2613                ("gname", 297..329),
2614                ("devmajor", 329..337),
2615                ("devminor", 337..345),
2616                ("prefix", 345..500),
2617                ("padding", 500..512),
2618            ];
2619            let mut out = String::new();
2620            for (name, range) in fields {
2621                let slice = &bytes[range.clone()];
2622                if slice.iter().all(|&b| b == 0) {
2623                    continue;
2624                }
2625                use std::fmt::Write;
2626                write!(out, "{name:>10}: ").unwrap();
2627                for &b in slice {
2628                    if b.is_ascii_graphic() || b == b' ' {
2629                        out.push(b as char);
2630                    } else {
2631                        write!(out, "\\x{b:02x}").unwrap();
2632                    }
2633                }
2634                out.push('\n');
2635            }
2636            out
2637        }
2638
2639        fn assert_headers_eq(ours: &[u8; 512], theirs: &[u8; 512]) {
2640            if ours != theirs {
2641                similar_asserts::assert_eq!(header_hex(ours), header_hex(theirs));
2642            }
2643        }
2644
2645        /// Strategy for generating valid file paths (ASCII, no null bytes, reasonable length).
2646        fn path_strategy() -> impl Strategy<Value = String> {
2647            proptest::string::string_regex(
2648                "[a-zA-Z0-9_][a-zA-Z0-9_.+-]*(/[a-zA-Z0-9_][a-zA-Z0-9_.+-]*)*",
2649            )
2650            .expect("valid regex")
2651            .prop_filter("reasonable length", |s| !s.is_empty() && s.len() < 100)
2652        }
2653
2654        /// Strategy for generating valid link targets.
2655        /// Avoids consecutive slashes and `.`/`..` segments which the tar crate normalizes.
2656        fn link_target_strategy() -> impl Strategy<Value = String> {
2657            proptest::string::string_regex(
2658                "[a-zA-Z0-9_][a-zA-Z0-9_+-]*(/[a-zA-Z0-9_][a-zA-Z0-9_+-]*)*",
2659            )
2660            .expect("valid regex")
2661            .prop_filter("reasonable length", |s| !s.is_empty() && s.len() < 100)
2662        }
2663
2664        /// Strategy for generating valid user/group names.
2665        fn name_strategy() -> impl Strategy<Value = String> {
2666            proptest::string::string_regex("[a-zA-Z_][a-zA-Z0-9_]{0,30}").expect("valid regex")
2667        }
2668
2669        /// Strategy for file mode (valid Unix permissions).
2670        fn mode_strategy() -> impl Strategy<Value = u32> {
2671            // Standard Unix permission modes
2672            prop_oneof![
2673                Just(0o644),    // regular file
2674                Just(0o755),    // executable
2675                Just(0o600),    // private
2676                Just(0o777),    // all permissions
2677                Just(0o400),    // read-only
2678                (0u32..0o7777), // any valid mode
2679            ]
2680        }
2681
2682        /// Strategy for uid/gid values that fit in octal.
2683        fn id_strategy() -> impl Strategy<Value = u64> {
2684            prop_oneof![
2685                Just(0u64),
2686                Just(1000u64),
2687                Just(65534u64),    // nobody
2688                (0u64..0o7777777), // fits in 7 octal digits
2689            ]
2690        }
2691
2692        /// Strategy for mtime values.
2693        fn mtime_strategy() -> impl Strategy<Value = u64> {
2694            prop_oneof![
2695                Just(0u64),
2696                Just(1234567890u64),
2697                (0u64..0o77777777777u64), // fits in 11 octal digits
2698            ]
2699        }
2700
2701        /// Strategy for file size values.
2702        fn size_strategy() -> impl Strategy<Value = u64> {
2703            prop_oneof![
2704                Just(0u64),
2705                Just(1u64),
2706                Just(512u64),
2707                Just(4096u64),
2708                (0u64..1024 * 1024), // up to 1 MB
2709            ]
2710        }
2711
2712        /// Test parameters for a regular file entry.
2713        #[derive(Debug, Clone)]
2714        struct FileParams {
2715            path: String,
2716            mode: u32,
2717            uid: u64,
2718            gid: u64,
2719            mtime: u64,
2720            size: u64,
2721            username: String,
2722            groupname: String,
2723        }
2724
2725        fn file_params_strategy() -> impl Strategy<Value = FileParams> {
2726            (
2727                path_strategy(),
2728                mode_strategy(),
2729                id_strategy(),
2730                id_strategy(),
2731                mtime_strategy(),
2732                size_strategy(),
2733                name_strategy(),
2734                name_strategy(),
2735            )
2736                .prop_map(
2737                    |(path, mode, uid, gid, mtime, size, username, groupname)| FileParams {
2738                        path,
2739                        mode,
2740                        uid,
2741                        gid,
2742                        mtime,
2743                        size,
2744                        username,
2745                        groupname,
2746                    },
2747                )
2748        }
2749
2750        /// Test parameters for a symlink entry.
2751        #[derive(Debug, Clone)]
2752        struct SymlinkParams {
2753            path: String,
2754            target: String,
2755            uid: u64,
2756            gid: u64,
2757            mtime: u64,
2758        }
2759
2760        fn symlink_params_strategy() -> impl Strategy<Value = SymlinkParams> {
2761            (
2762                path_strategy(),
2763                link_target_strategy(),
2764                id_strategy(),
2765                id_strategy(),
2766                mtime_strategy(),
2767            )
2768                .prop_map(|(path, target, uid, gid, mtime)| SymlinkParams {
2769                    path,
2770                    target,
2771                    uid,
2772                    gid,
2773                    mtime,
2774                })
2775        }
2776
2777        /// Test parameters for a directory entry.
2778        #[derive(Debug, Clone)]
2779        struct DirParams {
2780            path: String,
2781            mode: u32,
2782            uid: u64,
2783            gid: u64,
2784            mtime: u64,
2785        }
2786
2787        fn dir_params_strategy() -> impl Strategy<Value = DirParams> {
2788            (
2789                path_strategy(),
2790                mode_strategy(),
2791                id_strategy(),
2792                id_strategy(),
2793                mtime_strategy(),
2794            )
2795                .prop_map(|(path, mode, uid, gid, mtime)| DirParams {
2796                    path,
2797                    mode,
2798                    uid,
2799                    gid,
2800                    mtime,
2801                })
2802        }
2803
2804        /// Create a tar archive with a single file entry and return the bytes.
2805        fn create_file_tar(params: &FileParams, fmt: TarFormat) -> Vec<u8> {
2806            let mut builder = tar::Builder::new(Vec::new());
2807
2808            let mut header = fmt.tar_rs_header();
2809            header.set_path(&params.path).unwrap();
2810            header.set_mode(params.mode);
2811            header.set_uid(params.uid);
2812            header.set_gid(params.gid);
2813            header.set_mtime(params.mtime);
2814            header.set_size(params.size);
2815            header.set_entry_type(tar::EntryType::Regular);
2816            header.set_username(&params.username).unwrap();
2817            header.set_groupname(&params.groupname).unwrap();
2818            header.set_cksum();
2819
2820            let content = vec![0u8; params.size as usize];
2821            builder
2822                .append_data(&mut header, &params.path, content.as_slice())
2823                .unwrap();
2824
2825            builder.into_inner().unwrap()
2826        }
2827
2828        /// Create a tar archive with a symlink entry and return the bytes.
2829        fn create_symlink_tar(params: &SymlinkParams, fmt: TarFormat) -> Vec<u8> {
2830            let mut builder = tar::Builder::new(Vec::new());
2831
2832            let mut header = fmt.tar_rs_header();
2833            header.set_path(&params.path).unwrap();
2834            header.set_mode(0o777);
2835            header.set_uid(params.uid);
2836            header.set_gid(params.gid);
2837            header.set_mtime(params.mtime);
2838            header.set_size(0);
2839            header.set_entry_type(tar::EntryType::Symlink);
2840            header.set_link_name(&params.target).unwrap();
2841            header.set_cksum();
2842
2843            builder
2844                .append_data(&mut header, &params.path, std::io::empty())
2845                .unwrap();
2846
2847            builder.into_inner().unwrap()
2848        }
2849
2850        /// Create a tar archive with a directory entry and return the bytes.
2851        fn create_dir_tar(params: &DirParams, fmt: TarFormat) -> Vec<u8> {
2852            let mut builder = tar::Builder::new(Vec::new());
2853
2854            let mut header = fmt.tar_rs_header();
2855            let path = if params.path.ends_with('/') {
2856                params.path.clone()
2857            } else {
2858                format!("{}/", params.path)
2859            };
2860            header.set_path(&path).unwrap();
2861            header.set_mode(params.mode);
2862            header.set_uid(params.uid);
2863            header.set_gid(params.gid);
2864            header.set_mtime(params.mtime);
2865            header.set_size(0);
2866            header.set_entry_type(tar::EntryType::Directory);
2867            header.set_cksum();
2868
2869            builder
2870                .append_data(&mut header, &path, std::io::empty())
2871                .unwrap();
2872
2873            builder.into_inner().unwrap()
2874        }
2875
2876        /// Extract the first 512-byte header from a tar archive.
2877        fn extract_header_bytes(tar_data: &[u8]) -> [u8; 512] {
2878            tar_data[..512].try_into().unwrap()
2879        }
2880
2881        /// Compare our Header parsing against tar crate's parsing of the same
2882        /// bytes. Both parsers are reading identical data, so any disagreement
2883        /// is a bug in one of them.
2884        fn compare_headers(
2885            our_header: &Header,
2886            tar_header: &tar::Header,
2887        ) -> std::result::Result<(), TestCaseError> {
2888            // Entry type: compare the raw byte since both sides read from the
2889            // same header bytes.
2890            prop_assert_eq!(
2891                our_header.entry_type().to_byte(),
2892                tar_header.entry_type().as_byte(),
2893                "entry type mismatch"
2894            );
2895
2896            prop_assert_eq!(
2897                our_header.entry_size().unwrap(),
2898                tar_header.size().unwrap(),
2899                "size mismatch"
2900            );
2901            prop_assert_eq!(
2902                our_header.mode().unwrap(),
2903                tar_header.mode().unwrap(),
2904                "mode mismatch"
2905            );
2906            prop_assert_eq!(
2907                our_header.uid().unwrap(),
2908                tar_header.uid().unwrap(),
2909                "uid mismatch"
2910            );
2911            prop_assert_eq!(
2912                our_header.gid().unwrap(),
2913                tar_header.gid().unwrap(),
2914                "gid mismatch"
2915            );
2916            prop_assert_eq!(
2917                our_header.mtime().unwrap(),
2918                tar_header.mtime().unwrap(),
2919                "mtime mismatch"
2920            );
2921
2922            let tar_path = tar_header.path_bytes();
2923            prop_assert_eq!(our_header.path_bytes(), tar_path.as_ref(), "path mismatch");
2924
2925            let our_link = our_header.link_name_bytes();
2926            if let Some(tar_link) = tar_header.link_name_bytes() {
2927                prop_assert_eq!(our_link, tar_link.as_ref(), "link_name mismatch");
2928            } else {
2929                prop_assert!(our_link.is_empty(), "expected empty link name");
2930            }
2931
2932            if let Some(our_username) = our_header.username() {
2933                if let Some(tar_username) = tar_header.username_bytes() {
2934                    prop_assert_eq!(our_username, tar_username, "username mismatch");
2935                }
2936            }
2937
2938            if let Some(our_groupname) = our_header.groupname() {
2939                if let Some(tar_groupname) = tar_header.groupname_bytes() {
2940                    prop_assert_eq!(our_groupname, tar_groupname, "groupname mismatch");
2941                }
2942            }
2943
2944            our_header.verify_checksum().unwrap();
2945
2946            Ok(())
2947        }
2948
2949        proptest! {
2950            #![proptest_config(ProptestConfig::with_cases(256))]
2951
2952            #[test]
2953            fn test_file_header_crosscheck(
2954                params in file_params_strategy(),
2955                fmt in tar_format_strategy(),
2956            ) {
2957                let tar_data = create_file_tar(&params, fmt);
2958                let header_bytes = extract_header_bytes(&tar_data);
2959
2960                let our_header = Header::from_bytes(&header_bytes);
2961                let tar_header = tar::Header::from_byte_slice(&header_bytes);
2962
2963                compare_headers(our_header, tar_header)?;
2964
2965                prop_assert!(our_header.entry_type().is_file());
2966                prop_assert_eq!(our_header.entry_size().unwrap(), params.size);
2967
2968                if matches!(fmt, TarFormat::Gnu) {
2969                    prop_assert!(our_header.is_gnu());
2970                    prop_assert!(!our_header.is_ustar());
2971                }
2972            }
2973
2974            #[test]
2975            fn test_symlink_header_crosscheck(
2976                params in symlink_params_strategy(),
2977                fmt in tar_format_strategy(),
2978            ) {
2979                let tar_data = create_symlink_tar(&params, fmt);
2980                let header_bytes = extract_header_bytes(&tar_data);
2981
2982                let our_header = Header::from_bytes(&header_bytes);
2983                let tar_header = tar::Header::from_byte_slice(&header_bytes);
2984
2985                compare_headers(our_header, tar_header)?;
2986
2987                prop_assert!(our_header.entry_type().is_symlink());
2988                prop_assert_eq!(our_header.link_name_bytes(), params.target.as_bytes());
2989
2990                if matches!(fmt, TarFormat::Gnu) {
2991                    prop_assert!(our_header.is_gnu());
2992                }
2993            }
2994
2995            #[test]
2996            fn test_dir_header_crosscheck(
2997                params in dir_params_strategy(),
2998                fmt in tar_format_strategy(),
2999            ) {
3000                let tar_data = create_dir_tar(&params, fmt);
3001                let header_bytes = extract_header_bytes(&tar_data);
3002
3003                let our_header = Header::from_bytes(&header_bytes);
3004                let tar_header = tar::Header::from_byte_slice(&header_bytes);
3005
3006                compare_headers(our_header, tar_header)?;
3007
3008                prop_assert!(our_header.entry_type().is_dir());
3009
3010                if matches!(fmt, TarFormat::Gnu) {
3011                    prop_assert!(our_header.is_gnu());
3012                }
3013            }
3014        }
3015
3016        /// Test reading entries from real tar archives created by the tar crate.
3017        mod archive_tests {
3018            use super::*;
3019
3020            proptest! {
3021                #![proptest_config(ProptestConfig::with_cases(64))]
3022
3023                #[test]
3024                fn test_multi_entry_archive(
3025                    files in prop::collection::vec(file_params_strategy(), 1..8),
3026                    dirs in prop::collection::vec(dir_params_strategy(), 0..4),
3027                ) {
3028                    // Build an archive with multiple entries
3029                    let mut builder = tar::Builder::new(Vec::new());
3030
3031                    // Add directories first
3032                    for params in &dirs {
3033                        let mut header = tar::Header::new_ustar();
3034                        let path = if params.path.ends_with('/') {
3035                            params.path.clone()
3036                        } else {
3037                            format!("{}/", params.path)
3038                        };
3039                        header.set_path(&path).unwrap();
3040                        header.set_mode(params.mode);
3041                        header.set_uid(params.uid);
3042                        header.set_gid(params.gid);
3043                        header.set_mtime(params.mtime);
3044                        header.set_size(0);
3045                        header.set_entry_type(tar::EntryType::Directory);
3046                        header.set_cksum();
3047                        builder.append_data(&mut header, &path, std::io::empty()).unwrap();
3048                    }
3049
3050                    // Add files
3051                    for params in &files {
3052                        let mut header = tar::Header::new_ustar();
3053                        header.set_path(&params.path).unwrap();
3054                        header.set_mode(params.mode);
3055                        header.set_uid(params.uid);
3056                        header.set_gid(params.gid);
3057                        header.set_mtime(params.mtime);
3058                        header.set_size(params.size);
3059                        header.set_entry_type(tar::EntryType::Regular);
3060                        header.set_username(&params.username).unwrap();
3061                        header.set_groupname(&params.groupname).unwrap();
3062                        header.set_cksum();
3063
3064                        let content = vec![0u8; params.size as usize];
3065                        builder.append_data(&mut header, &params.path, content.as_slice()).unwrap();
3066                    }
3067
3068                    let tar_data = builder.into_inner().unwrap();
3069
3070                    // Now iterate through the archive and verify each header
3071                    let mut archive = tar::Archive::new(Cursor::new(&tar_data));
3072                    let entries = archive.entries().unwrap();
3073
3074                    for entry_result in entries {
3075                        let entry = entry_result.unwrap();
3076                        let tar_header = entry.header();
3077
3078                        // Get the raw header bytes from the archive
3079                        let our_header = Header::from_bytes(tar_header.as_bytes());
3080
3081                        compare_headers(our_header, tar_header)?;
3082                    }
3083                }
3084            }
3085        }
3086
3087        /// Test format detection (UStar vs GNU vs Old).
3088        mod format_detection_tests {
3089            use super::*;
3090
3091            proptest! {
3092                #![proptest_config(ProptestConfig::with_cases(128))]
3093
3094                #[test]
3095                fn test_ustar_format_detected(params in file_params_strategy()) {
3096                    let tar_data = create_file_tar(&params, TarFormat::Ustar);
3097                    let header_bytes = extract_header_bytes(&tar_data);
3098
3099                    let our_header = Header::from_bytes(&header_bytes);
3100
3101                    prop_assert!(our_header.is_ustar(), "should be UStar");
3102                    prop_assert!(!our_header.is_gnu(), "should not be GNU");
3103
3104                    prop_assert_eq!(&header_bytes[257..263], USTAR_MAGIC);
3105                    prop_assert_eq!(&header_bytes[263..265], USTAR_VERSION);
3106                }
3107
3108                #[test]
3109                fn test_gnu_format_detected(params in file_params_strategy()) {
3110                    let tar_data = create_file_tar(&params, TarFormat::Gnu);
3111                    let header_bytes = extract_header_bytes(&tar_data);
3112
3113                    let our_header = Header::from_bytes(&header_bytes);
3114
3115                    prop_assert!(our_header.is_gnu(), "should be GNU");
3116                    prop_assert!(!our_header.is_ustar(), "should not be UStar");
3117
3118                    prop_assert_eq!(&header_bytes[257..263], GNU_MAGIC);
3119                    prop_assert_eq!(&header_bytes[263..265], GNU_VERSION);
3120                }
3121            }
3122
3123            #[test]
3124            fn test_old_format_detection() {
3125                // Create a header with no magic (old format)
3126                let mut header_bytes = [0u8; 512];
3127
3128                // Set a simple file name
3129                header_bytes[0..4].copy_from_slice(b"test");
3130
3131                // Set mode (octal)
3132                header_bytes[100..107].copy_from_slice(b"0000644");
3133
3134                // Set size = 0
3135                header_bytes[124..135].copy_from_slice(b"00000000000");
3136
3137                // Set typeflag = regular file
3138                header_bytes[156] = b'0';
3139
3140                // Compute and set checksum
3141                let mut checksum: u64 = 0;
3142                for (i, &byte) in header_bytes.iter().enumerate() {
3143                    if (148..156).contains(&i) {
3144                        checksum += u64::from(b' ');
3145                    } else {
3146                        checksum += u64::from(byte);
3147                    }
3148                }
3149                let checksum_str = format!("{checksum:06o}\0 ");
3150                header_bytes[148..156].copy_from_slice(checksum_str.as_bytes());
3151
3152                let our_header = Header::from_bytes(&header_bytes);
3153
3154                // Old format: neither UStar nor GNU
3155                assert!(!our_header.is_ustar());
3156                assert!(!our_header.is_gnu());
3157
3158                // But we can still parse basic fields
3159                assert_eq!(our_header.path_bytes(), b"test");
3160                assert_eq!(our_header.entry_type(), EntryType::Regular);
3161            }
3162        }
3163
3164        /// Test checksum computation matches tar crate.
3165        mod checksum_tests {
3166            use super::*;
3167
3168            proptest! {
3169                #![proptest_config(ProptestConfig::with_cases(256))]
3170
3171                #[test]
3172                fn test_checksum_always_valid(
3173                    params in file_params_strategy(),
3174                    fmt in tar_format_strategy(),
3175                ) {
3176                    let tar_data = create_file_tar(&params, fmt);
3177                    let header_bytes = extract_header_bytes(&tar_data);
3178
3179                    let our_header = Header::from_bytes(&header_bytes);
3180                    our_header.verify_checksum().unwrap();
3181                }
3182
3183                #[test]
3184                fn test_checksum_recompute(
3185                    params in file_params_strategy(),
3186                    fmt in tar_format_strategy(),
3187                ) {
3188                    let tar_data = create_file_tar(&params, fmt);
3189                    let header_bytes = extract_header_bytes(&tar_data);
3190
3191                    let our_header = Header::from_bytes(&header_bytes);
3192
3193                    // Our computed checksum should match
3194                    let computed = our_header.compute_checksum();
3195                    let stored = parse_octal(&header_bytes[148..156]).unwrap();
3196
3197                    prop_assert_eq!(computed, stored);
3198                }
3199            }
3200        }
3201
3202        /// Test entry type mapping is complete.
3203        mod entry_type_tests {
3204            use super::*;
3205
3206            #[test]
3207            fn test_all_entry_types_map_correctly() {
3208                // Test all known entry type bytes
3209                let mappings: &[(u8, EntryType, tar::EntryType)] = &[
3210                    (b'0', EntryType::Regular, tar::EntryType::Regular),
3211                    (b'\0', EntryType::Regular, tar::EntryType::Regular),
3212                    (b'1', EntryType::Link, tar::EntryType::Link),
3213                    (b'2', EntryType::Symlink, tar::EntryType::Symlink),
3214                    (b'3', EntryType::Char, tar::EntryType::Char),
3215                    (b'4', EntryType::Block, tar::EntryType::Block),
3216                    (b'5', EntryType::Directory, tar::EntryType::Directory),
3217                    (b'6', EntryType::Fifo, tar::EntryType::Fifo),
3218                    (b'7', EntryType::Continuous, tar::EntryType::Continuous),
3219                    (b'L', EntryType::GnuLongName, tar::EntryType::GNULongName),
3220                    (b'K', EntryType::GnuLongLink, tar::EntryType::GNULongLink),
3221                    (b'S', EntryType::GnuSparse, tar::EntryType::GNUSparse),
3222                    (b'x', EntryType::XHeader, tar::EntryType::XHeader),
3223                    (
3224                        b'g',
3225                        EntryType::XGlobalHeader,
3226                        tar::EntryType::XGlobalHeader,
3227                    ),
3228                ];
3229
3230                for &(byte, expected_ours, expected_tar) in mappings {
3231                    let ours = EntryType::from_byte(byte);
3232                    let tar_type = tar::EntryType::new(byte);
3233
3234                    assert_eq!(ours, expected_ours, "our mapping for byte {byte}");
3235                    assert_eq!(tar_type, expected_tar, "tar mapping for byte {byte}");
3236                }
3237            }
3238
3239            proptest! {
3240                #[test]
3241                fn test_entry_type_roundtrip(byte: u8) {
3242                    let our_type = EntryType::from_byte(byte);
3243                    let tar_type = tar::EntryType::new(byte);
3244
3245                    // Both should handle unknown types gracefully
3246                    let our_byte = our_type.to_byte();
3247                    let tar_byte = tar_type.as_byte();
3248
3249                    // For regular files, '\0' maps to '0'
3250                    if byte == b'\0' {
3251                        prop_assert_eq!(our_byte, b'0');
3252                    } else {
3253                        prop_assert_eq!(our_byte, tar_byte);
3254                    }
3255                }
3256            }
3257        }
3258
3259        /// Encode/decode roundtrip and panic-freedom tests for octal and
3260        /// numeric fields. These cover the properties that are too expensive
3261        /// for Kani (stdlib `from_utf8`/`from_str_radix` have unbounded loops).
3262        mod codec_tests {
3263            use super::*;
3264
3265            proptest! {
3266                #![proptest_config(ProptestConfig::with_cases(10_000))]
3267
3268                #[test]
3269                fn test_encode_octal_8_roundtrip(value in 0u64..=0o7777777) {
3270                    let mut field = [0u8; 8];
3271                    encode_octal(&mut field, value).unwrap();
3272                    prop_assert_eq!(parse_octal(&field).unwrap(), value);
3273                }
3274
3275                #[test]
3276                fn test_encode_octal_12_roundtrip(value in 0u64..=0o77777777777) {
3277                    let mut field = [0u8; 12];
3278                    encode_octal(&mut field, value).unwrap();
3279                    prop_assert_eq!(parse_octal(&field).unwrap(), value);
3280                }
3281
3282                // 8-byte base-256 has 63 data bits, so values < 2^63 roundtrip.
3283                #[test]
3284                fn test_encode_numeric_8_roundtrip(value in 0u64..=(i64::MAX as u64)) {
3285                    let mut field = [0u8; 8];
3286                    encode_numeric(&mut field, value).unwrap();
3287                    prop_assert_eq!(parse_numeric(&field).unwrap(), value);
3288                }
3289
3290                // Values >= 2^63 cannot be represented in an 8-byte base-256 field.
3291                #[test]
3292                fn test_encode_numeric_8_rejects_huge(value in (i64::MAX as u64 + 1)..=u64::MAX) {
3293                    let mut field = [0u8; 8];
3294                    prop_assert!(encode_numeric(&mut field, value).is_err());
3295                }
3296
3297                #[test]
3298                fn test_encode_numeric_12_roundtrip(value: u64) {
3299                    let mut field = [0u8; 12];
3300                    encode_numeric(&mut field, value).unwrap();
3301                    prop_assert_eq!(parse_numeric(&field).unwrap(), value);
3302                }
3303
3304                #[test]
3305                fn test_encode_octal_8_rejects_overflow(value in 0o10000000u64..=u64::MAX) {
3306                    let mut field = [0u8; 8];
3307                    prop_assert!(encode_octal(&mut field, value).is_err());
3308                }
3309
3310                #[test]
3311                fn test_encode_octal_12_rejects_overflow(value in 0o100000000000u64..=u64::MAX) {
3312                    let mut field = [0u8; 12];
3313                    prop_assert!(encode_octal(&mut field, value).is_err());
3314                }
3315
3316                #[test]
3317                fn test_parse_octal_8_no_panic(bytes in proptest::array::uniform8(0u8..)) {
3318                    let _ = parse_octal(&bytes);
3319                }
3320
3321                #[test]
3322                fn test_parse_octal_12_no_panic(bytes in proptest::array::uniform12(0u8..)) {
3323                    let _ = parse_octal(&bytes);
3324                }
3325
3326                #[test]
3327                fn test_parse_numeric_8_no_panic(bytes in proptest::array::uniform8(0u8..)) {
3328                    let _ = parse_numeric(&bytes);
3329                }
3330
3331                #[test]
3332                fn test_parse_numeric_12_no_panic(bytes in proptest::array::uniform12(0u8..)) {
3333                    let _ = parse_numeric(&bytes);
3334                }
3335            }
3336        }
3337
3338        /// Tests that verify tar-core's builder APIs produce bit-identical
3339        /// output compared to tar-rs when given the same inputs.
3340        ///
3341        /// This is critical for ensuring tar-rs can rebase on tar-core.
3342        mod builder_equivalence_tests {
3343            use super::*;
3344
3345            fn build_file_tar_core(params: &FileParams, fmt: TarFormat) -> Header {
3346                let mut b = fmt.header_builder();
3347                b.path(params.path.as_bytes())
3348                    .unwrap()
3349                    .mode(params.mode)
3350                    .unwrap()
3351                    .uid(params.uid)
3352                    .unwrap()
3353                    .gid(params.gid)
3354                    .unwrap()
3355                    .size(params.size)
3356                    .unwrap()
3357                    .mtime(params.mtime)
3358                    .unwrap()
3359                    .entry_type(EntryType::Regular)
3360                    .username(params.username.as_bytes())
3361                    .unwrap()
3362                    .groupname(params.groupname.as_bytes())
3363                    .unwrap();
3364                b.finish()
3365            }
3366
3367            fn build_file_tar_rs(params: &FileParams, fmt: TarFormat) -> [u8; 512] {
3368                let mut h = fmt.tar_rs_header();
3369                h.set_path(&params.path).unwrap();
3370                h.set_mode(params.mode);
3371                h.set_uid(params.uid);
3372                h.set_gid(params.gid);
3373                h.set_size(params.size);
3374                h.set_mtime(params.mtime);
3375                h.set_entry_type(tar::EntryType::Regular);
3376                h.set_username(&params.username).unwrap();
3377                h.set_groupname(&params.groupname).unwrap();
3378                h.set_cksum();
3379                tar_rs_bytes(&h)
3380            }
3381
3382            fn build_symlink_tar_core(params: &SymlinkParams, fmt: TarFormat) -> Header {
3383                let mut b = fmt.header_builder();
3384                b.path(params.path.as_bytes())
3385                    .unwrap()
3386                    .mode(0o777)
3387                    .unwrap()
3388                    .uid(params.uid)
3389                    .unwrap()
3390                    .gid(params.gid)
3391                    .unwrap()
3392                    .size(0)
3393                    .unwrap()
3394                    .mtime(params.mtime)
3395                    .unwrap()
3396                    .entry_type(EntryType::Symlink)
3397                    .link_name(params.target.as_bytes())
3398                    .unwrap();
3399                b.finish()
3400            }
3401
3402            fn build_symlink_tar_rs(params: &SymlinkParams, fmt: TarFormat) -> [u8; 512] {
3403                let mut h = fmt.tar_rs_header();
3404                h.set_path(&params.path).unwrap();
3405                h.set_mode(0o777);
3406                h.set_uid(params.uid);
3407                h.set_gid(params.gid);
3408                h.set_size(0);
3409                h.set_mtime(params.mtime);
3410                h.set_entry_type(tar::EntryType::Symlink);
3411                h.set_link_name(&params.target).unwrap();
3412                h.set_cksum();
3413                tar_rs_bytes(&h)
3414            }
3415
3416            fn build_dir_tar_core(params: &DirParams, fmt: TarFormat) -> Header {
3417                let mut b = fmt.header_builder();
3418                let path = if params.path.ends_with('/') {
3419                    params.path.clone()
3420                } else {
3421                    format!("{}/", params.path)
3422                };
3423                b.path(path.as_bytes())
3424                    .unwrap()
3425                    .mode(params.mode)
3426                    .unwrap()
3427                    .uid(params.uid)
3428                    .unwrap()
3429                    .gid(params.gid)
3430                    .unwrap()
3431                    .size(0)
3432                    .unwrap()
3433                    .mtime(params.mtime)
3434                    .unwrap()
3435                    .entry_type(EntryType::Directory);
3436                b.finish()
3437            }
3438
3439            fn build_dir_tar_rs(params: &DirParams, fmt: TarFormat) -> [u8; 512] {
3440                let mut h = fmt.tar_rs_header();
3441                let path = if params.path.ends_with('/') {
3442                    params.path.clone()
3443                } else {
3444                    format!("{}/", params.path)
3445                };
3446                h.set_path(&path).unwrap();
3447                h.set_mode(params.mode);
3448                h.set_uid(params.uid);
3449                h.set_gid(params.gid);
3450                h.set_size(0);
3451                h.set_mtime(params.mtime);
3452                h.set_entry_type(tar::EntryType::Directory);
3453                h.set_cksum();
3454                tar_rs_bytes(&h)
3455            }
3456
3457            fn build_file_header_setters(params: &FileParams, fmt: TarFormat) -> [u8; 512] {
3458                let mut h = fmt.our_header();
3459                h.set_path(params.path.as_bytes()).unwrap();
3460                h.set_mode(params.mode).unwrap();
3461                h.set_uid(params.uid).unwrap();
3462                h.set_gid(params.gid).unwrap();
3463                h.set_size(params.size).unwrap();
3464                h.set_mtime(params.mtime).unwrap();
3465                h.set_entry_type(EntryType::Regular);
3466                h.set_username(params.username.as_bytes()).unwrap();
3467                h.set_groupname(params.groupname.as_bytes()).unwrap();
3468                h.set_checksum();
3469                *h.as_bytes()
3470            }
3471
3472            proptest! {
3473                #![proptest_config(ProptestConfig::with_cases(256))]
3474
3475                #[test]
3476                fn test_file_builder_equivalence(
3477                    params in file_params_strategy(),
3478                    fmt in tar_format_strategy(),
3479                ) {
3480                    assert_headers_eq(
3481                        build_file_tar_core(&params, fmt).as_bytes(),
3482                        &build_file_tar_rs(&params, fmt),
3483                    );
3484                }
3485
3486                #[test]
3487                fn test_symlink_builder_equivalence(
3488                    params in symlink_params_strategy(),
3489                    fmt in tar_format_strategy(),
3490                ) {
3491                    assert_headers_eq(
3492                        build_symlink_tar_core(&params, fmt).as_bytes(),
3493                        &build_symlink_tar_rs(&params, fmt),
3494                    );
3495                }
3496
3497                #[test]
3498                fn test_dir_builder_equivalence(
3499                    params in dir_params_strategy(),
3500                    fmt in tar_format_strategy(),
3501                ) {
3502                    assert_headers_eq(
3503                        build_dir_tar_core(&params, fmt).as_bytes(),
3504                        &build_dir_tar_rs(&params, fmt),
3505                    );
3506                }
3507
3508                #[test]
3509                fn test_header_setters_equivalence(
3510                    params in file_params_strategy(),
3511                    fmt in tar_format_strategy(),
3512                ) {
3513                    assert_headers_eq(
3514                        &build_file_header_setters(&params, fmt),
3515                        &build_file_tar_rs(&params, fmt),
3516                    );
3517                }
3518            }
3519
3520            /// Test large values that require base-256 encoding.
3521            mod base256_equivalence {
3522                use super::*;
3523
3524                /// Strategy for large UID/GID values that require base-256.
3525                fn large_id_strategy() -> impl Strategy<Value = u64> {
3526                    prop_oneof![
3527                        Just(2097152u64),              // just over octal limit
3528                        Just(u32::MAX as u64),         // common large value
3529                        (2097152u64..u32::MAX as u64), // range requiring base-256
3530                    ]
3531                }
3532
3533                /// Create a minimal GNU regular-file header pair for field-level tests.
3534                fn default_headers() -> (Header, tar::Header) {
3535                    let mut ours = Header::new_gnu();
3536                    ours.set_path(b"test.txt").unwrap();
3537                    ours.set_mode(0o644).unwrap();
3538                    ours.set_uid(1000).unwrap();
3539                    ours.set_gid(1000).unwrap();
3540                    ours.set_size(0).unwrap();
3541                    ours.set_mtime(0).unwrap();
3542                    ours.set_entry_type(EntryType::Regular);
3543
3544                    let mut theirs = tar::Header::new_gnu();
3545                    theirs.set_path("test.txt").unwrap();
3546                    theirs.set_mode(0o644);
3547                    theirs.set_uid(1000);
3548                    theirs.set_gid(1000);
3549                    theirs.set_size(0);
3550                    theirs.set_mtime(0);
3551                    theirs.set_entry_type(tar::EntryType::Regular);
3552
3553                    (ours, theirs)
3554                }
3555
3556                #[test]
3557                fn test_large_uid_encoding() {
3558                    let (mut ours, mut theirs) = default_headers();
3559                    ours.set_uid(2_500_000).unwrap();
3560                    ours.set_checksum();
3561                    theirs.set_uid(2_500_000);
3562                    theirs.set_cksum();
3563
3564                    assert_eq!(&ours.as_bytes()[108..116], &theirs.as_bytes()[108..116]);
3565                    assert_eq!(ours.uid().unwrap(), 2_500_000);
3566                }
3567
3568                #[test]
3569                fn test_large_gid_encoding() {
3570                    let (mut ours, mut theirs) = default_headers();
3571                    ours.set_gid(3_000_000).unwrap();
3572                    ours.set_checksum();
3573                    theirs.set_gid(3_000_000);
3574                    theirs.set_cksum();
3575
3576                    assert_eq!(&ours.as_bytes()[116..124], &theirs.as_bytes()[116..124]);
3577                    assert_eq!(ours.gid().unwrap(), 3_000_000);
3578                }
3579
3580                proptest! {
3581                    #![proptest_config(ProptestConfig::with_cases(64))]
3582
3583                    #[test]
3584                    fn test_large_uid_proptest(uid in large_id_strategy()) {
3585                        let (mut ours, mut theirs) = default_headers();
3586                        ours.set_uid(uid).unwrap();
3587                        ours.set_checksum();
3588                        theirs.set_uid(uid);
3589                        theirs.set_cksum();
3590
3591                        prop_assert_eq!(
3592                            &ours.as_bytes()[108..116],
3593                            &theirs.as_bytes()[108..116],
3594                        );
3595                    }
3596
3597                    #[test]
3598                    fn test_large_gid_proptest(gid in large_id_strategy()) {
3599                        let (mut ours, mut theirs) = default_headers();
3600                        ours.set_gid(gid).unwrap();
3601                        ours.set_checksum();
3602                        theirs.set_gid(gid);
3603                        theirs.set_cksum();
3604
3605                        prop_assert_eq!(
3606                            &ours.as_bytes()[116..124],
3607                            &theirs.as_bytes()[116..124],
3608                        );
3609                    }
3610                }
3611            }
3612
3613            /// Test infallible `_small` setter variants roundtrip correctly
3614            /// and produce output equivalent to tar-rs for the same values.
3615            mod small_setter_tests {
3616                use super::*;
3617
3618                fn default_header_pair() -> (Header, tar::Header) {
3619                    let mut ours = Header::new_gnu();
3620                    ours.set_path(b"t.txt").unwrap();
3621                    ours.set_mode_small(0o644);
3622                    ours.set_uid(0).unwrap();
3623                    ours.set_gid(0).unwrap();
3624                    ours.set_size_small(0);
3625                    ours.set_mtime_small(0);
3626                    ours.set_entry_type(EntryType::Regular);
3627
3628                    let mut theirs = tar::Header::new_gnu();
3629                    theirs.set_path("t.txt").unwrap();
3630                    theirs.set_mode(0o644);
3631                    theirs.set_uid(0);
3632                    theirs.set_gid(0);
3633                    theirs.set_size(0);
3634                    theirs.set_mtime(0);
3635                    theirs.set_entry_type(tar::EntryType::Regular);
3636
3637                    (ours, theirs)
3638                }
3639
3640                proptest! {
3641                    #![proptest_config(ProptestConfig::with_cases(256))]
3642
3643                    #[test]
3644                    fn test_set_mode_small_roundtrip(mode: u16) {
3645                        let (mut ours, mut theirs) = default_header_pair();
3646                        ours.set_mode_small(mode);
3647                        ours.set_checksum();
3648                        theirs.set_mode(u32::from(mode));
3649                        theirs.set_cksum();
3650
3651                        prop_assert_eq!(ours.mode().unwrap(), u32::from(mode));
3652                        prop_assert_eq!(
3653                            &ours.as_bytes()[100..108],
3654                            &theirs.as_bytes()[100..108],
3655                        );
3656                    }
3657
3658                    #[test]
3659                    fn test_set_size_small_roundtrip(size: u32) {
3660                        let (mut ours, mut theirs) = default_header_pair();
3661                        ours.set_size_small(size);
3662                        ours.set_checksum();
3663                        theirs.set_size(u64::from(size));
3664                        theirs.set_cksum();
3665
3666                        prop_assert_eq!(ours.entry_size().unwrap(), u64::from(size));
3667                        prop_assert_eq!(
3668                            &ours.as_bytes()[124..136],
3669                            &theirs.as_bytes()[124..136],
3670                        );
3671                    }
3672
3673                    #[test]
3674                    fn test_set_mtime_small_roundtrip(mtime: u32) {
3675                        let (mut ours, mut theirs) = default_header_pair();
3676                        ours.set_mtime_small(mtime);
3677                        ours.set_checksum();
3678                        theirs.set_mtime(u64::from(mtime));
3679                        theirs.set_cksum();
3680
3681                        prop_assert_eq!(ours.mtime().unwrap(), u64::from(mtime));
3682                        prop_assert_eq!(
3683                            &ours.as_bytes()[136..148],
3684                            &theirs.as_bytes()[136..148],
3685                        );
3686                    }
3687
3688                    #[test]
3689                    fn test_set_device_small_roundtrip(major: u16, minor: u16) {
3690                        let mut header = Header::new_ustar();
3691                        header.set_device_small(major, minor);
3692
3693                        prop_assert_eq!(
3694                            header.device_major().unwrap().unwrap(),
3695                            u32::from(major),
3696                        );
3697                        prop_assert_eq!(
3698                            header.device_minor().unwrap().unwrap(),
3699                            u32::from(minor),
3700                        );
3701                    }
3702                }
3703            }
3704
3705            /// Test GNU long name/link extensions produce equivalent output.
3706            mod gnu_extensions_equivalence {
3707                use super::*;
3708                use crate::builder::EntryBuilder;
3709
3710                /// Strategy for generating long paths (101-300 bytes).
3711                fn long_path_strategy() -> impl Strategy<Value = String> {
3712                    // Generate paths like "aaa/bbb/ccc/..." that exceed 100 bytes
3713                    (3..15usize)
3714                        .prop_flat_map(|segments| {
3715                            proptest::collection::vec(
3716                                proptest::string::string_regex("[a-z]{5,20}").expect("valid regex"),
3717                                segments,
3718                            )
3719                        })
3720                        .prop_map(|parts| parts.join("/"))
3721                        .prop_filter("must exceed 100 bytes", |s| s.len() > 100 && s.len() < 300)
3722                }
3723
3724                /// Strategy for generating long link targets.
3725                fn long_link_strategy() -> impl Strategy<Value = String> {
3726                    long_path_strategy()
3727                }
3728
3729                /// Parameters for a long-path file entry with random metadata.
3730                #[derive(Debug, Clone)]
3731                struct LongPathFileParams {
3732                    path: String,
3733                    mode: u32,
3734                    uid: u64,
3735                    gid: u64,
3736                    mtime: u64,
3737                }
3738
3739                fn long_path_file_params_strategy() -> impl Strategy<Value = LongPathFileParams> {
3740                    (
3741                        long_path_strategy(),
3742                        mode_strategy(),
3743                        id_strategy(),
3744                        id_strategy(),
3745                        mtime_strategy(),
3746                    )
3747                        .prop_map(|(path, mode, uid, gid, mtime)| {
3748                            LongPathFileParams {
3749                                path,
3750                                mode,
3751                                uid,
3752                                gid,
3753                                mtime,
3754                            }
3755                        })
3756                }
3757
3758                /// Parameters for a symlink entry with long target.
3759                #[derive(Debug, Clone)]
3760                struct LongLinkParams {
3761                    path: String,
3762                    target: String,
3763                    uid: u64,
3764                    gid: u64,
3765                    mtime: u64,
3766                }
3767
3768                fn long_link_params_strategy() -> impl Strategy<Value = LongLinkParams> {
3769                    (
3770                        path_strategy(),      // short path for the symlink itself
3771                        long_link_strategy(), // long target
3772                        id_strategy(),
3773                        id_strategy(),
3774                        mtime_strategy(),
3775                    )
3776                        .prop_map(|(path, target, uid, gid, mtime)| {
3777                            LongLinkParams {
3778                                path,
3779                                target,
3780                                uid,
3781                                gid,
3782                                mtime,
3783                            }
3784                        })
3785                }
3786
3787                /// Extract all header blocks from a tar archive created by tar-rs.
3788                /// Uses `tar::Archive` in raw mode to see extension headers.
3789                fn extract_all_headers(tar_data: &[u8]) -> Vec<Header> {
3790                    let mut archive = tar::Archive::new(std::io::Cursor::new(tar_data));
3791                    archive
3792                        .entries()
3793                        .expect("tar entries")
3794                        .raw(true)
3795                        .map(|e| {
3796                            let e = e.expect("tar entry");
3797                            *Header::from_bytes(e.header().as_bytes())
3798                        })
3799                        .collect()
3800                }
3801
3802                /// Build a tar archive with a long path using tar-rs.
3803                fn build_long_path_with_tar_rs(params: &LongPathFileParams) -> Vec<u8> {
3804                    let mut builder = tar::Builder::new(Vec::new());
3805
3806                    let mut header = tar::Header::new_gnu();
3807                    header.set_mode(params.mode);
3808                    header.set_uid(params.uid);
3809                    header.set_gid(params.gid);
3810                    header.set_size(0);
3811                    header.set_mtime(params.mtime);
3812                    header.set_entry_type(tar::EntryType::Regular);
3813
3814                    builder
3815                        .append_data(&mut header, &params.path, std::io::empty())
3816                        .unwrap();
3817                    builder.into_inner().unwrap()
3818                }
3819
3820                /// Build entry headers with a long path using tar-core.
3821                fn build_long_path_with_tar_core(params: &LongPathFileParams) -> Vec<Header> {
3822                    let mut builder = EntryBuilder::new_gnu();
3823                    builder
3824                        .path(params.path.as_bytes())
3825                        .mode(params.mode)
3826                        .unwrap()
3827                        .uid(params.uid)
3828                        .unwrap()
3829                        .gid(params.gid)
3830                        .unwrap()
3831                        .size(0)
3832                        .unwrap()
3833                        .mtime(params.mtime)
3834                        .unwrap()
3835                        .entry_type(EntryType::Regular);
3836
3837                    builder.finish()
3838                }
3839
3840                /// Build a tar archive with a long symlink using tar-rs.
3841                fn build_long_link_with_tar_rs(params: &LongLinkParams) -> Vec<u8> {
3842                    let mut builder = tar::Builder::new(Vec::new());
3843
3844                    let mut header = tar::Header::new_gnu();
3845                    header.set_mode(0o777);
3846                    header.set_uid(params.uid);
3847                    header.set_gid(params.gid);
3848                    header.set_size(0);
3849                    header.set_mtime(params.mtime);
3850                    header.set_entry_type(tar::EntryType::Symlink);
3851                    builder
3852                        .append_link(&mut header, &params.path, &params.target)
3853                        .unwrap();
3854                    builder.into_inner().unwrap()
3855                }
3856
3857                /// Build entry headers with a long symlink target using tar-core.
3858                fn build_long_link_with_tar_core(params: &LongLinkParams) -> Vec<Header> {
3859                    let mut builder = EntryBuilder::new_gnu();
3860                    builder
3861                        .path(params.path.as_bytes())
3862                        .link_name(params.target.as_bytes())
3863                        .mode(0o777)
3864                        .unwrap()
3865                        .uid(params.uid)
3866                        .unwrap()
3867                        .gid(params.gid)
3868                        .unwrap()
3869                        .size(0)
3870                        .unwrap()
3871                        .mtime(params.mtime)
3872                        .unwrap()
3873                        .entry_type(EntryType::Symlink);
3874
3875                    builder.finish()
3876                }
3877
3878                /// Compare the extension and main headers from our builder
3879                /// against those extracted from a tar-rs archive.
3880                ///
3881                /// Our builder returns all blocks (headers + data), while
3882                /// `extract_all_headers` returns only header blocks, so we
3883                /// compare first (extension) and last (main) individually.
3884                ///
3885                /// Extension headers only need semantic equality (type, path,
3886                /// size) since metadata fields like mode/uid/gid are set
3887                /// differently by tar-rs vs tar-core (both are valid).
3888                fn compare_extension_headers(our_blocks: &[Header], tar_headers: &[Header]) {
3889                    assert!(our_blocks.len() >= 2, "expected extension + main headers");
3890                    assert!(tar_headers.len() >= 2, "expected extension + main headers");
3891
3892                    let our_ext = &our_blocks[0];
3893                    let tar_ext = &tar_headers[0];
3894                    assert_eq!(our_ext.entry_type(), tar_ext.entry_type(), "extension type");
3895                    assert_eq!(our_ext.path_bytes(), tar_ext.path_bytes(), "extension path");
3896                    assert_eq!(
3897                        our_ext.entry_size().unwrap(),
3898                        tar_ext.entry_size().unwrap(),
3899                        "extension size"
3900                    );
3901
3902                    // Main header: compare key fields. The linkname field
3903                    // can differ because tar-rs normalizes paths while our
3904                    // builder writes raw bytes truncated to 100 bytes.
3905                    let our_main = our_blocks.last().unwrap();
3906                    let tar_main = tar_headers.last().unwrap();
3907                    assert_eq!(our_main.entry_type(), tar_main.entry_type(), "main type");
3908                    assert_eq!(
3909                        our_main.mode().unwrap(),
3910                        tar_main.mode().unwrap(),
3911                        "main mode"
3912                    );
3913                    assert_eq!(our_main.uid().unwrap(), tar_main.uid().unwrap(), "main uid");
3914                    assert_eq!(our_main.gid().unwrap(), tar_main.gid().unwrap(), "main gid");
3915                    assert_eq!(
3916                        our_main.mtime().unwrap(),
3917                        tar_main.mtime().unwrap(),
3918                        "main mtime"
3919                    );
3920                }
3921
3922                #[test]
3923                fn test_gnu_longname_basic() {
3924                    let params = LongPathFileParams {
3925                        path: "a/".repeat(60) + "file.txt",
3926                        mode: 0o644,
3927                        uid: 1000,
3928                        gid: 1000,
3929                        mtime: 1234567890,
3930                    };
3931                    compare_extension_headers(
3932                        &build_long_path_with_tar_core(&params),
3933                        &extract_all_headers(&build_long_path_with_tar_rs(&params)),
3934                    );
3935                }
3936
3937                #[test]
3938                fn test_gnu_longlink_basic() {
3939                    let params = LongLinkParams {
3940                        path: "mylink".to_string(),
3941                        target: "/very/long/target/".repeat(10),
3942                        uid: 1000,
3943                        gid: 1000,
3944                        mtime: 1234567890,
3945                    };
3946                    compare_extension_headers(
3947                        &build_long_link_with_tar_core(&params),
3948                        &extract_all_headers(&build_long_link_with_tar_rs(&params)),
3949                    );
3950                }
3951
3952                proptest! {
3953                    #![proptest_config(ProptestConfig::with_cases(32))]
3954
3955                    #[test]
3956                    fn test_gnu_longname_equivalence(params in long_path_file_params_strategy()) {
3957                        compare_extension_headers(
3958                            &build_long_path_with_tar_core(&params),
3959                            &extract_all_headers(&build_long_path_with_tar_rs(&params)),
3960                        );
3961                    }
3962
3963                    #[test]
3964                    fn test_gnu_longlink_equivalence(params in long_link_params_strategy()) {
3965                        compare_extension_headers(
3966                            &build_long_link_with_tar_core(&params),
3967                            &extract_all_headers(&build_long_link_with_tar_rs(&params)),
3968                        );
3969                    }
3970                }
3971            }
3972
3973            /// Test PAX extension headers produce equivalent output.
3974            mod pax_extensions_equivalence {
3975                use super::*;
3976                use crate::builder::{EntryBuilder, PaxBuilder};
3977
3978                /// Parameters for a file with PAX xattrs.
3979                #[derive(Debug, Clone)]
3980                struct PaxFileParams {
3981                    path: String,
3982                    mode: u32,
3983                    uid: u64,
3984                    gid: u64,
3985                    mtime: u64,
3986                    xattr_key: String,
3987                    xattr_value: String,
3988                }
3989
3990                fn pax_file_params_strategy() -> impl Strategy<Value = PaxFileParams> {
3991                    (
3992                        path_strategy(),
3993                        mode_strategy(),
3994                        id_strategy(),
3995                        id_strategy(),
3996                        mtime_strategy(),
3997                        proptest::string::string_regex("SCHILY\\.xattr\\.[a-z]{1,20}")
3998                            .expect("valid regex"),
3999                        proptest::string::string_regex("[a-zA-Z0-9]{1,30}").expect("valid regex"),
4000                    )
4001                        .prop_map(
4002                            |(path, mode, uid, gid, mtime, xattr_key, xattr_value)| PaxFileParams {
4003                                path,
4004                                mode,
4005                                uid,
4006                                gid,
4007                                mtime,
4008                                xattr_key,
4009                                xattr_value,
4010                            },
4011                        )
4012                }
4013
4014                /// Build a tar with PAX extended headers using tar-rs.
4015                fn build_pax_with_tar_rs(params: &PaxFileParams) -> Vec<u8> {
4016                    let mut builder = tar::Builder::new(Vec::new());
4017
4018                    // Build PAX records manually
4019                    let mut pax_data = Vec::new();
4020                    let record =
4021                        format_pax_record(&params.xattr_key, params.xattr_value.as_bytes());
4022                    pax_data.extend_from_slice(record.as_bytes());
4023
4024                    // Create PAX header
4025                    let mut pax_header = tar::Header::new_ustar();
4026                    let pax_name = format!("PaxHeaders.0/{}", params.path);
4027                    pax_header.set_path(&pax_name).unwrap();
4028                    pax_header.set_size(pax_data.len() as u64);
4029                    pax_header.set_entry_type(tar::EntryType::XHeader);
4030                    pax_header.set_mode(0o644);
4031                    pax_header.set_uid(0);
4032                    pax_header.set_gid(0);
4033                    pax_header.set_mtime(0);
4034                    pax_header.set_cksum();
4035
4036                    builder
4037                        .append_data(&mut pax_header, &pax_name, pax_data.as_slice())
4038                        .unwrap();
4039
4040                    // Create main header
4041                    let mut header = tar::Header::new_ustar();
4042                    header.set_path(&params.path).unwrap();
4043                    header.set_mode(params.mode);
4044                    header.set_uid(params.uid);
4045                    header.set_gid(params.gid);
4046                    header.set_size(0);
4047                    header.set_mtime(params.mtime);
4048                    header.set_entry_type(tar::EntryType::Regular);
4049                    header.set_cksum();
4050
4051                    builder
4052                        .append_data(&mut header, &params.path, std::io::empty())
4053                        .unwrap();
4054                    builder.into_inner().unwrap()
4055                }
4056
4057                /// Format a PAX record.
4058                fn format_pax_record(key: &str, value: &[u8]) -> String {
4059                    // Format: "<len> <key>=<value>\n"
4060                    let rest_len = 3 + key.len() + value.len();
4061                    let mut len_len = 1;
4062                    let mut max_len = 10;
4063                    while rest_len + len_len >= max_len {
4064                        len_len += 1;
4065                        max_len *= 10;
4066                    }
4067                    let len = rest_len + len_len;
4068                    format!("{} {}={}\n", len, key, String::from_utf8_lossy(value))
4069                }
4070
4071                /// Build entry with PAX xattrs using tar-core.
4072                fn build_pax_with_tar_core(params: &PaxFileParams) -> Vec<Header> {
4073                    let mut builder = EntryBuilder::new_ustar();
4074                    builder
4075                        .path(params.path.as_bytes())
4076                        .mode(params.mode)
4077                        .unwrap()
4078                        .uid(params.uid)
4079                        .unwrap()
4080                        .gid(params.gid)
4081                        .unwrap()
4082                        .size(0)
4083                        .unwrap()
4084                        .mtime(params.mtime)
4085                        .unwrap()
4086                        .entry_type(EntryType::Regular)
4087                        .add_pax(&params.xattr_key, params.xattr_value.as_bytes());
4088
4089                    builder.finish()
4090                }
4091
4092                #[test]
4093                fn test_pax_xattr_basic() {
4094                    let params = PaxFileParams {
4095                        path: "testfile".to_string(),
4096                        mode: 0o644,
4097                        uid: 1000,
4098                        gid: 1000,
4099                        mtime: 1234567890,
4100                        xattr_key: "SCHILY.xattr.user.test".to_string(),
4101                        xattr_value: "value1".to_string(),
4102                    };
4103
4104                    // Build with both to verify structure
4105                    let _tar_data = build_pax_with_tar_rs(&params);
4106                    let our_headers = build_pax_with_tar_core(&params);
4107
4108                    // We should have PAX header + main header
4109                    assert!(our_headers.len() >= 2, "should have PAX extension");
4110
4111                    // First header should be XHeader
4112                    let our_ext = &our_headers[0];
4113                    assert_eq!(our_ext.entry_type(), EntryType::XHeader);
4114
4115                    // Last header should be Regular
4116                    let our_main = our_headers.last().unwrap();
4117                    assert_eq!(our_main.entry_type(), EntryType::Regular);
4118                }
4119
4120                #[test]
4121                fn test_pax_builder_record_format() {
4122                    // Verify PaxBuilder produces correctly formatted records
4123                    let mut pax = PaxBuilder::new();
4124                    pax.add("SCHILY.xattr.user.test", b"hello");
4125                    let data = pax.finish();
4126
4127                    // Parse it back
4128                    let exts = PaxExtensions::new(&data);
4129                    let value = exts.get("SCHILY.xattr.user.test");
4130                    assert_eq!(value, Some("hello"));
4131                }
4132
4133                proptest! {
4134                    #![proptest_config(ProptestConfig::with_cases(32))]
4135
4136                    /// Test PAX records are correctly formatted with random key/value.
4137                    #[test]
4138                    fn test_pax_record_roundtrip(
4139                        key in "[a-zA-Z][a-zA-Z0-9.]{1,30}",
4140                        value in "[a-zA-Z0-9]{1,50}",
4141                    ) {
4142                        let mut pax = PaxBuilder::new();
4143                        pax.add(&key, value.as_bytes());
4144                        let data = pax.finish();
4145
4146                        let exts = PaxExtensions::new(&data);
4147                        let parsed = exts.get(&key);
4148                        prop_assert_eq!(parsed, Some(value.as_str()));
4149                    }
4150
4151                    /// Test PAX files with random metadata produce valid headers.
4152                    #[test]
4153                    fn test_pax_file_equivalence(params in pax_file_params_strategy()) {
4154                        let _tar_data = build_pax_with_tar_rs(&params);
4155                        let our_headers = build_pax_with_tar_core(&params);
4156
4157                        // We should have PAX header + data blocks + main header
4158                        prop_assert!(our_headers.len() >= 2, "should have PAX extension");
4159
4160                        // First header should be XHeader
4161                        let our_ext = &our_headers[0];
4162                        prop_assert_eq!(our_ext.entry_type(), EntryType::XHeader);
4163
4164                        // Last header should be the main entry
4165                        let our_main = our_headers.last().unwrap();
4166                        prop_assert_eq!(our_main.entry_type(), EntryType::Regular);
4167                        prop_assert_eq!(our_main.mode().unwrap(), params.mode);
4168                        prop_assert_eq!(our_main.uid().unwrap(), params.uid);
4169                        prop_assert_eq!(our_main.gid().unwrap(), params.gid);
4170                        prop_assert_eq!(our_main.mtime().unwrap(), params.mtime);
4171                    }
4172                }
4173            }
4174        }
4175    }
4176}
4177
4178// ============================================================================
4179// Kani Formal Verification Proofs
4180// ============================================================================
4181
4182#[cfg(kani)]
4183mod kani_proofs {
4184    use super::*;
4185
4186    // Only proofs that complete in <10s are included here. Octal/numeric
4187    // encode/parse roundtrips involve stdlib `from_utf8`/`from_str_radix`
4188    // which have unbounded internal loops CBMC cannot handle efficiently;
4189    // those properties are tested via proptest instead.
4190
4191    #[kani::proof]
4192    #[kani::unwind(18)]
4193    fn check_truncate_null_panic_freedom() {
4194        let bytes: [u8; 16] = kani::any();
4195        let len: usize = kani::any();
4196        kani::assume(len <= bytes.len());
4197        let result = truncate_null(&bytes[..len]);
4198        kani::assert(result.len() <= len, "result within bounds");
4199    }
4200
4201    #[kani::proof]
4202    fn check_entry_type_roundtrip() {
4203        let byte: u8 = kani::any();
4204        let entry_type = EntryType::from_byte(byte);
4205        let back = entry_type.to_byte();
4206        if byte == b'\0' {
4207            kani::assert(back == b'0', "null byte canonicalizes to '0'");
4208        } else {
4209            kani::assert(back == byte, "non-null bytes roundtrip exactly");
4210        }
4211    }
4212
4213    #[kani::proof]
4214    fn check_entry_type_predicates_dont_panic() {
4215        let byte: u8 = kani::any();
4216        let ty = EntryType::from_byte(byte);
4217        let _ = ty.is_file();
4218        let _ = ty.is_dir();
4219        let _ = ty.is_symlink();
4220        let _ = ty.is_hard_link();
4221        let _ = ty.is_character_special();
4222        let _ = ty.is_block_special();
4223        let _ = ty.is_fifo();
4224        let _ = ty.is_contiguous();
4225        let _ = ty.is_gnu_longname();
4226        let _ = ty.is_gnu_longlink();
4227        let _ = ty.is_gnu_sparse();
4228        let _ = ty.is_pax_global_extensions();
4229        let _ = ty.is_pax_local_extensions();
4230    }
4231}