Skip to main content

tar_core/
parse.rs

1//! Sans-IO tar archive parser.
2//!
3//! This module provides a sans-IO state machine parser for tar archives.
4//! It operates on `&[u8]` slices directly (no `Read` trait bound), making
5//! it suitable for:
6//!
7//! - Async I/O (tokio, async-std)
8//! - Custom buffering strategies
9//! - Zero-copy parsing in memory-mapped archives
10//! - Embedding in other parsers
11//!
12//! In addition to the parser itself, this module contains the configuration
13//! and error types it uses: [`Limits`] for security limits and [`ParseError`]
14//! for error reporting.
15//!
16//! # Design
17//!
18//! The parser is a state machine that processes bytes and emits [`ParseEvent`]s.
19//! The caller is responsible for:
20//!
21//! 1. Providing input data via [`Parser::parse`]
22//! 2. Handling events (headers, content markers, end-of-archive)
23//! 3. Managing the buffer and reading more data when needed
24//!
25//! # Example
26//!
27//! ```
28//! use tar_core::parse::{Parser, ParseEvent, Limits};
29//!
30//! let mut parser = Parser::new(Limits::default());
31//!
32//! // Simulated tar data (in practice, read from file/network)
33//! let data = [0u8; 1024]; // Two zero blocks = end of archive
34//!
35//! match parser.parse(&data) {
36//!     Ok(ParseEvent::End { consumed }) => {
37//!         println!("End of archive after {} bytes", consumed);
38//!     }
39//!     Ok(event) => {
40//!         println!("Got event {:?}", event);
41//!     }
42//!     Err(e) => {
43//!         eprintln!("Parse error: {}", e);
44//!     }
45//! }
46//! ```
47
48use alloc::borrow::Cow;
49use alloc::borrow::ToOwned;
50use alloc::format;
51use alloc::string::String;
52use alloc::vec::Vec;
53use core::str::Utf8Error;
54
55use thiserror::Error;
56use zerocopy::FromBytes;
57
58use crate::{
59    EntryType, GnuExtSparseHeader, Header, HeaderError, PaxError, PaxExtensions, SparseEntry,
60    HEADER_SIZE, PAX_GID, PAX_GNAME, PAX_GNU_SPARSE_MAJOR, PAX_GNU_SPARSE_MAP,
61    PAX_GNU_SPARSE_MINOR, PAX_GNU_SPARSE_NAME, PAX_GNU_SPARSE_NUMBYTES, PAX_GNU_SPARSE_OFFSET,
62    PAX_GNU_SPARSE_REALSIZE, PAX_GNU_SPARSE_SIZE, PAX_LINKPATH, PAX_MTIME, PAX_PATH,
63    PAX_SCHILY_XATTR, PAX_SIZE, PAX_UID, PAX_UNAME,
64};
65
66// ============================================================================
67// Limits
68// ============================================================================
69
70/// Configurable security limits for tar archive parsing.
71///
72/// These limits protect against malicious or malformed archives that could
73/// exhaust memory or create excessively long paths.
74///
75/// # Example
76///
77/// ```
78/// use tar_core::parse::Limits;
79///
80/// // Use defaults
81/// let limits = Limits::default();
82///
83/// // Customize limits
84/// let limits = Limits {
85///     max_metadata_size: 64 * 1024,
86///     // Set to libc::PATH_MAX when extracting to disk
87///     max_path_len: Some(4096),
88///     ..Default::default()
89/// };
90/// ```
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct Limits {
93    /// Maximum total size of all extension metadata for a single entry, in bytes.
94    ///
95    /// This is an aggregate budget: the combined size of PAX extended headers,
96    /// GNU long name, and GNU long link data for one file entry must not exceed
97    /// this limit. Exceeding it will cause a [`ParseError::MetadataTooLarge`]
98    /// error.
99    ///
100    /// Default: 1 MiB (1,048,576 bytes).
101    pub max_metadata_size: u32,
102
103    /// Optional maximum path length in bytes.
104    ///
105    /// When set, paths and link targets exceeding this limit will cause a
106    /// [`ParseError::PathTooLong`] error. When `None`, no path length check
107    /// is performed (the default).
108    ///
109    /// Callers extracting to a real filesystem should set this to
110    /// `libc::PATH_MAX` (4096 on Linux, 1024 on macOS) or the appropriate
111    /// platform constant.
112    ///
113    /// Default: `None`.
114    pub max_path_len: Option<u32>,
115
116    /// Maximum number of consecutive metadata entries before an actual entry.
117    ///
118    /// Prevents infinite loops from malformed archives that contain only
119    /// metadata entries (GNU long name, PAX headers) without actual file entries.
120    /// Exceeding this limit will cause a [`ParseError::TooManyPendingEntries`] error.
121    ///
122    /// Default: 16 entries.
123    pub max_pending_entries: usize,
124
125    /// Maximum number of sparse data entries (chunks) in a sparse file.
126    ///
127    /// Prevents unbounded memory allocation from a malicious archive that
128    /// claims an enormous number of sparse regions (see CVE-2025-58183 for
129    /// a similar issue in Go's `archive/tar`).
130    ///
131    /// For old GNU sparse format, each 512-byte extension block holds 21
132    /// descriptors, so 1000 entries requires ~48 extension blocks (~24 KiB).
133    ///
134    /// Default: 10000.
135    pub max_sparse_entries: usize,
136}
137
138impl Default for Limits {
139    fn default() -> Self {
140        Self {
141            max_metadata_size: 1024 * 1024, // 1 MiB
142            max_path_len: None,
143            max_pending_entries: 16,
144            max_sparse_entries: 10_000,
145        }
146    }
147}
148
149impl Limits {
150    /// Create a new `Limits` with default values.
151    #[must_use]
152    pub fn new() -> Self {
153        Self::default()
154    }
155
156    /// Create permissive limits suitable for trusted archives.
157    ///
158    /// This sets very high limits that effectively disable most checks.
159    /// Only use this for archives from trusted sources.
160    #[must_use]
161    pub fn permissive() -> Self {
162        Self {
163            max_metadata_size: u32::MAX,
164            max_path_len: None,
165            max_pending_entries: usize::MAX,
166            max_sparse_entries: 1_000_000,
167        }
168    }
169
170    /// Check a path length against the configured limit.
171    ///
172    /// Returns `Ok(())` if the path is within the limit (or no limit is set),
173    /// or `Err(ParseError::PathTooLong)` if it exceeds it.
174    pub fn check_path_len(&self, len: usize) -> Result<()> {
175        if let Some(limit) = self.max_path_len {
176            if len > limit as usize {
177                return Err(ParseError::PathTooLong { len, limit });
178            }
179        }
180        Ok(())
181    }
182}
183
184// ============================================================================
185// Errors
186// ============================================================================
187
188/// Errors that can occur during tar archive parsing.
189#[derive(Debug, Error)]
190pub enum ParseError {
191    /// I/O error from the underlying reader.
192    #[cfg(feature = "std")]
193    #[error("I/O error: {0}")]
194    Io(#[from] std::io::Error),
195
196    /// Header parsing error (checksum, invalid octal, etc.).
197    #[error("header error: {0}")]
198    Header(#[from] HeaderError),
199
200    /// PAX extension parsing error.
201    #[error("PAX error: {0}")]
202    Pax(#[from] PaxError),
203
204    /// Invalid UTF-8 in PAX key.
205    #[error("invalid UTF-8 in PAX key: {0}")]
206    InvalidUtf8(#[from] Utf8Error),
207
208    /// Path exceeds configured maximum length.
209    #[error("path exceeds limit: {len} bytes > {limit} bytes")]
210    PathTooLong {
211        /// Actual path length.
212        len: usize,
213        /// Configured limit.
214        limit: u32,
215    },
216
217    /// Extension metadata exceeds configured maximum size.
218    ///
219    /// The aggregate size of all extension data (PAX headers, GNU long
220    /// name/link) for a single entry exceeded [`Limits::max_metadata_size`].
221    #[error("metadata exceeds limit: {size} bytes > {limit} bytes")]
222    MetadataTooLarge {
223        /// Total metadata size that would result.
224        size: u64,
225        /// Configured limit.
226        limit: u32,
227    },
228
229    /// Duplicate GNU long name entry without an intervening actual entry.
230    #[error("duplicate GNU long name entry")]
231    DuplicateGnuLongName,
232
233    /// Duplicate GNU long link entry without an intervening actual entry.
234    #[error("duplicate GNU long link entry")]
235    DuplicateGnuLongLink,
236
237    /// Duplicate PAX extended header without an intervening actual entry.
238    #[error("duplicate PAX extended header")]
239    DuplicatePaxHeader,
240
241    /// Metadata entries (GNU long name, PAX, etc.) found but no actual entry followed.
242    #[error("metadata entries without a following actual entry")]
243    OrphanedMetadata,
244
245    /// Too many consecutive metadata entries (possible infinite loop or malicious archive).
246    #[error("too many pending metadata entries: {count} > {limit}")]
247    TooManyPendingEntries {
248        /// Number of pending metadata entries.
249        count: usize,
250        /// Configured limit.
251        limit: usize,
252    },
253
254    /// Too many sparse entries (possible denial-of-service attack).
255    #[error("too many sparse entries: {count} > {limit}")]
256    TooManySparseEntries {
257        /// Number of sparse entries found.
258        count: usize,
259        /// Configured limit.
260        limit: usize,
261    },
262
263    /// Sparse entry type present but header is not GNU format.
264    #[error("sparse entry type but header is not GNU format")]
265    SparseNotGnu,
266
267    /// A PAX sparse map field is malformed.
268    #[error("invalid PAX sparse map: {0}")]
269    InvalidPaxSparseMap(Cow<'static, str>),
270
271    /// A PAX extension value failed to parse.
272    #[error("invalid PAX {key} value: {value:?}")]
273    InvalidPaxValue {
274        /// The PAX key (e.g. "uid", "size").
275        key: &'static str,
276        /// The raw value string.
277        value: Cow<'static, str>,
278    },
279
280    /// Entry path is empty after applying all overrides (GNU long name, PAX path, etc.).
281    #[error("entry has empty path")]
282    EmptyPath,
283
284    /// Entry size in header is invalid (e.g., overflow when computing padded size).
285    #[error("invalid entry size: {0}")]
286    InvalidSize(u64),
287
288    /// Unexpected EOF while reading entry content or padding.
289    #[error("unexpected EOF at position {pos}")]
290    UnexpectedEof {
291        /// Position in the stream where EOF occurred.
292        pos: u64,
293    },
294}
295
296/// Result type for parsing operations.
297pub type Result<T> = core::result::Result<T, ParseError>;
298
299// ============================================================================
300// Parser
301// ============================================================================
302
303/// Events emitted by the sans-IO parser.
304#[derive(Debug)]
305#[allow(clippy::large_enum_variant)]
306pub enum ParseEvent<'a> {
307    /// Need more data to continue parsing.
308    ///
309    /// No bytes are consumed from the input when this event is returned.
310    /// The caller should ensure at least `min_bytes` bytes are available
311    /// before calling `parse` again with the same (or larger) buffer.
312    NeedData {
313        /// Minimum number of bytes needed to make progress.
314        min_bytes: usize,
315    },
316
317    /// A complete entry header has been parsed.
318    ///
319    /// The entry contains resolved metadata (path, link target, etc.) with
320    /// GNU long name/link and PAX extensions applied.
321    ///
322    /// After this event, the caller must read or skip `entry.size` bytes
323    /// of content plus padding to the next 512-byte boundary before
324    /// calling `parse()` again with the next header bytes.
325    Entry {
326        /// Number of bytes consumed from the input for this entry's header(s).
327        consumed: usize,
328        /// The parsed entry with resolved metadata.
329        entry: ParsedEntry<'a>,
330    },
331
332    /// A GNU sparse file entry has been parsed.
333    ///
334    /// This is emitted instead of [`Entry`](ParseEvent::Entry) when the entry
335    /// type is `GnuSparse` (type 'S'). The sparse map describes which regions
336    /// of the logical file contain real data; gaps are implicitly zero-filled.
337    ///
338    /// After this event, the caller must read or skip `entry.size` bytes
339    /// of content (the on-disk data for the sparse regions) plus padding to
340    /// the next 512-byte boundary before calling `parse()` again. The
341    /// `consumed` count already includes any GNU sparse extension blocks
342    /// that followed the header.
343    SparseEntry {
344        /// Number of bytes consumed from the input for this entry's header(s),
345        /// including any GNU sparse extension blocks.
346        consumed: usize,
347        /// The parsed entry with resolved metadata.
348        /// `entry.size` is the on-disk content size (sum of sparse chunk
349        /// lengths). The logical file size is `real_size`.
350        entry: ParsedEntry<'a>,
351        /// The sparse data map: regions of real data within the logical file.
352        sparse_map: Vec<SparseEntry>,
353        /// The logical (uncompressed) size of the file, from the GNU header's
354        /// `realsize` field.
355        real_size: u64,
356    },
357
358    /// A PAX global extended header (type 'g') has been parsed.
359    ///
360    /// Per POSIX, global headers apply default attributes to all subsequent
361    /// entries in the archive. However, this parser does **not** apply them
362    /// automatically — it surfaces the raw data so the caller can decide
363    /// how to handle it (e.g., merge into a defaults table, ignore, etc.).
364    ///
365    /// The `pax_data` can be parsed with
366    /// [`PaxExtensions::new`](crate::PaxExtensions::new).
367    GlobalExtensions {
368        /// Number of bytes consumed from the input (header + padded content).
369        consumed: usize,
370        /// The raw PAX key-value data from the global header.
371        pax_data: &'a [u8],
372    },
373
374    /// Archive end marker reached (two consecutive zero blocks, or clean EOF).
375    End {
376        /// Number of bytes consumed from the input.
377        consumed: usize,
378    },
379}
380
381impl<'a> ParseEvent<'a> {
382    /// Adjust byte offsets in this event to account for `n` bytes that were
383    /// already consumed from the front of the original input before the
384    /// sub-slice was handed to a recursive `parse_header` call.
385    ///
386    /// For `Entry`, `SparseEntry`, and `End`, `n` is added to `consumed`.
387    ///
388    /// For `NeedData`, `n` is added to `min_bytes` so the requirement is
389    /// expressed relative to the *original* input buffer, not the sub-slice.
390    fn add_consumed(self, n: usize) -> Self {
391        match self {
392            ParseEvent::NeedData { min_bytes } => ParseEvent::NeedData {
393                min_bytes: min_bytes.saturating_add(n),
394            },
395            ParseEvent::Entry { consumed, entry } => ParseEvent::Entry {
396                consumed: consumed.saturating_add(n),
397                entry,
398            },
399            ParseEvent::SparseEntry {
400                consumed,
401                entry,
402                sparse_map,
403                real_size,
404            } => ParseEvent::SparseEntry {
405                consumed: consumed.saturating_add(n),
406                entry,
407                sparse_map,
408                real_size,
409            },
410            ParseEvent::GlobalExtensions { consumed, pax_data } => ParseEvent::GlobalExtensions {
411                consumed: consumed.saturating_add(n),
412                pax_data,
413            },
414            ParseEvent::End { consumed } => ParseEvent::End {
415                consumed: consumed.saturating_add(n),
416            },
417        }
418    }
419}
420
421/// A fully-resolved tar entry with all extensions applied.
422///
423/// Borrowed data comes from the input slice, so the entry is valid only
424/// as long as the input buffer is live.
425#[derive(Debug)]
426pub struct ParsedEntry<'a> {
427    /// The raw 512-byte header.
428    pub header: &'a Header,
429
430    /// The entry type (Regular, Directory, Symlink, etc.).
431    pub entry_type: EntryType,
432
433    /// The resolved file path.
434    ///
435    /// Priority: PAX `path` > GNU long name > header `name` (+ UStar `prefix`).
436    pub path: Cow<'a, [u8]>,
437
438    /// The resolved link target (for symlinks and hardlinks).
439    ///
440    /// Priority: PAX `linkpath` > GNU long link > header `linkname`.
441    pub link_target: Option<Cow<'a, [u8]>>,
442
443    /// File mode/permissions.
444    pub mode: u32,
445
446    /// Owner UID.
447    pub uid: u64,
448
449    /// Owner GID.
450    pub gid: u64,
451
452    /// Modification time as Unix timestamp.
453    pub mtime: u64,
454
455    /// Content size in bytes.
456    pub size: u64,
457
458    /// User name.
459    pub uname: Option<Cow<'a, [u8]>>,
460
461    /// Group name.
462    pub gname: Option<Cow<'a, [u8]>>,
463
464    /// Device major number (for block/char devices).
465    pub dev_major: Option<u32>,
466
467    /// Device minor number (for block/char devices).
468    pub dev_minor: Option<u32>,
469
470    /// Extended attributes from PAX `SCHILY.xattr.*` entries.
471    #[allow(clippy::type_complexity)]
472    pub xattrs: Vec<(Cow<'a, [u8]>, Cow<'a, [u8]>)>,
473
474    /// Raw PAX extended header data, if a PAX `'x'` entry preceded this entry.
475    ///
476    /// This is the unprocessed content of the PAX extension entry, preserved
477    /// so that callers can iterate all PAX key-value pairs (not just the ones
478    /// tar-core resolves into struct fields). Parse it with
479    /// [`PaxExtensions::new`](crate::PaxExtensions::new).
480    pub pax: Option<&'a [u8]>,
481}
482
483impl<'a> ParsedEntry<'a> {
484    /// Get the path as a lossy UTF-8 string.
485    #[must_use]
486    pub fn path_lossy(&self) -> Cow<'_, str> {
487        String::from_utf8_lossy(&self.path)
488    }
489
490    /// Get the link target as a lossy UTF-8 string, if present.
491    #[must_use]
492    pub fn link_target_lossy(&self) -> Option<Cow<'_, str>> {
493        self.link_target
494            .as_ref()
495            .map(|t| String::from_utf8_lossy(t))
496    }
497
498    /// Check if this is a regular file.
499    #[must_use]
500    pub fn is_file(&self) -> bool {
501        self.entry_type.is_file()
502    }
503
504    /// Check if this is a directory.
505    #[must_use]
506    pub fn is_dir(&self) -> bool {
507        self.entry_type.is_dir()
508    }
509
510    /// Check if this is a symbolic link.
511    #[must_use]
512    pub fn is_symlink(&self) -> bool {
513        self.entry_type.is_symlink()
514    }
515
516    /// Check if this is a hard link.
517    #[must_use]
518    pub fn is_hard_link(&self) -> bool {
519        self.entry_type.is_hard_link()
520    }
521
522    /// Get the padded size (rounded up to block boundary).
523    #[must_use]
524    pub fn padded_size(&self) -> u64 {
525        self.size.next_multiple_of(HEADER_SIZE as u64)
526    }
527}
528
529/// Internal parser state.
530#[derive(Debug, Clone, Copy, PartialEq, Eq)]
531enum State {
532    /// Waiting to read a header.
533    ReadHeader,
534    /// Archive is complete.
535    Done,
536}
537
538/// The kind of extension header being processed.
539#[derive(Debug, Clone, Copy)]
540enum ExtensionKind {
541    GnuLongName,
542    GnuLongLink,
543    Pax,
544}
545
546/// Borrowed extension data accumulated during recursive extension processing.
547///
548/// This is threaded through `parse_header` → `handle_extension` → `parse_header`
549/// calls within a single `parse()` invocation. Since extension chains are always
550/// fully resolved within one call (or discarded on `NeedData`), we can borrow
551/// directly from the input slice — no allocation needed.
552#[derive(Debug, Default, Clone, Copy)]
553struct PendingMetadata<'a> {
554    gnu_long_name: Option<&'a [u8]>,
555    gnu_long_link: Option<&'a [u8]>,
556    pax_extensions: Option<&'a [u8]>,
557    count: usize,
558    /// Running total of all extension data bytes accumulated so far.
559    metadata_size: u64,
560}
561
562/// Context for GNU sparse entries, passed from `handle_gnu_sparse` to
563/// `emit_entry` to produce a `ParseEvent::SparseEntry`.
564struct SparseContext {
565    sparse_map: Vec<SparseEntry>,
566    real_size: u64,
567    /// Number of bytes consumed by extension blocks (not counting the
568    /// main header itself).
569    ext_consumed: usize,
570}
571
572impl PendingMetadata<'_> {
573    fn is_empty(&self) -> bool {
574        self.gnu_long_name.is_none()
575            && self.gnu_long_link.is_none()
576            && self.pax_extensions.is_none()
577    }
578}
579
580/// Check PAX extensions for GNU sparse version.
581///
582/// Returns `Some((major, minor))` if `GNU.sparse.major` and
583/// `GNU.sparse.minor` are both present and parseable, `None` if
584/// the keys are absent. When `ignore_errors` is true, malformed values
585/// are silently skipped instead of producing errors.
586fn pax_sparse_version(pax: &[u8], ignore_errors: bool) -> Result<Option<(u64, u64)>> {
587    let mut major = None;
588    let mut minor = None;
589    for ext in PaxExtensions::new(pax) {
590        let ext = ext?;
591        let key = match ext.key() {
592            Ok(k) => k,
593            Err(_) if ignore_errors => continue,
594            Err(e) => return Err(ParseError::from(e)),
595        };
596        match key {
597            PAX_GNU_SPARSE_MAJOR => {
598                let s = match ext.value() {
599                    Ok(s) => s,
600                    Err(_) if ignore_errors => continue,
601                    Err(_) => {
602                        return Err(ParseError::InvalidPaxValue {
603                            key: PAX_GNU_SPARSE_MAJOR,
604                            value: Cow::Borrowed("<non-UTF-8>"),
605                        })
606                    }
607                };
608                match s.parse::<u64>() {
609                    Ok(v) => major = Some(v),
610                    Err(_) if ignore_errors => {}
611                    Err(_) => {
612                        return Err(ParseError::InvalidPaxValue {
613                            key: PAX_GNU_SPARSE_MAJOR,
614                            value: s.to_owned().into(),
615                        })
616                    }
617                }
618            }
619            PAX_GNU_SPARSE_MINOR => {
620                let s = match ext.value() {
621                    Ok(s) => s,
622                    Err(_) if ignore_errors => continue,
623                    Err(_) => {
624                        return Err(ParseError::InvalidPaxValue {
625                            key: PAX_GNU_SPARSE_MINOR,
626                            value: Cow::Borrowed("<non-UTF-8>"),
627                        })
628                    }
629                };
630                match s.parse::<u64>() {
631                    Ok(v) => minor = Some(v),
632                    Err(_) if ignore_errors => {}
633                    Err(_) => {
634                        return Err(ParseError::InvalidPaxValue {
635                            key: PAX_GNU_SPARSE_MINOR,
636                            value: s.to_owned().into(),
637                        })
638                    }
639                }
640            }
641            _ => {}
642        }
643        if major.is_some() && minor.is_some() {
644            break;
645        }
646    }
647    match (major, minor) {
648        (Some(maj), Some(min)) => Ok(Some((maj, min))),
649        _ => Ok(None),
650    }
651}
652
653/// Sans-IO tar archive parser.
654///
655/// This parser operates as a state machine on `&[u8]` input slices.
656/// It does not perform any I/O itself - the caller is responsible for
657/// providing data and handling the parsed events.
658///
659/// # Usage
660///
661/// The caller feeds header bytes to `parse()`. On `Entry`, the caller
662/// reads/skips `entry.size` bytes of content (plus padding to the next
663/// 512-byte boundary) from its own I/O source, then calls `parse()`
664/// again with the next header bytes. The parser does not see or track
665/// content bytes.
666///
667/// ```ignore
668/// let mut parser = Parser::new(Limits::default());
669/// let mut buf = vec![0u8; 65536];
670/// let mut filled = 0;
671///
672/// loop {
673///     match parser.parse(&buf[..filled]) {
674///         Ok(ParseEvent::NeedData { min_bytes }) => {
675///             let n = read_more(&mut buf[filled..])?;
676///             filled += n;
677///             if n == 0 && filled < min_bytes {
678///                 return Err("unexpected EOF");
679///             }
680///         }
681///         Ok(ParseEvent::Entry { consumed, entry }) => {
682///             process_entry(&entry);
683///             // Read/skip entry.size bytes + padding, then clear buf
684///             skip_content(entry.padded_size())?;
685///             filled = 0;
686///         }
687///         Ok(ParseEvent::End { .. }) => break,
688///         Err(e) => return Err(e),
689///     }
690/// }
691/// ```
692#[derive(Debug)]
693pub struct Parser {
694    limits: Limits,
695    state: State,
696    /// When true, entries with empty paths are allowed through instead of
697    /// returning [`ParseError::EmptyPath`].
698    allow_empty_path: bool,
699    /// When false, header checksum verification is skipped. This is useful
700    /// for fuzzing, where random input almost never has valid checksums,
701    /// preventing the fuzzer from exercising deeper parser logic.
702    ///
703    /// Default: `true`.
704    verify_checksums: bool,
705    /// When true, malformed PAX extension values (invalid UTF-8, unparseable
706    /// integers for uid/gid/size/mtime) are silently skipped instead of
707    /// producing errors. This matches the behavior of many real-world tar
708    /// implementations.
709    ///
710    /// Default: `false`.
711    ignore_pax_errors: bool,
712}
713
714impl Parser {
715    /// Create a new parser with the given limits.
716    #[must_use]
717    pub fn new(limits: Limits) -> Self {
718        Self {
719            limits,
720            state: State::ReadHeader,
721            allow_empty_path: false,
722            verify_checksums: true,
723            ignore_pax_errors: false,
724        }
725    }
726
727    /// Allow entries with empty paths instead of rejecting them with
728    /// [`ParseError::EmptyPath`].
729    pub fn set_allow_empty_path(&mut self, allow: bool) {
730        self.allow_empty_path = allow;
731    }
732
733    /// Control whether header checksums are verified during parsing.
734    ///
735    /// When set to `false`, the parser skips [`Header::verify_checksum`]
736    /// calls, accepting headers regardless of their checksum field. This
737    /// is primarily useful for fuzz testing, where random input almost
738    /// never produces valid checksums, preventing the fuzzer from reaching
739    /// deeper parser code paths.
740    ///
741    /// Default: `true`.
742    pub fn set_verify_checksums(&mut self, verify: bool) {
743        self.verify_checksums = verify;
744    }
745
746    /// Control whether malformed PAX extension values are silently ignored.
747    ///
748    /// When set to `true`, PAX values that fail to parse (invalid UTF-8,
749    /// unparseable integers for `uid`, `gid`, `size`, `mtime`) are skipped
750    /// instead of producing [`ParseError::InvalidPaxValue`] errors. This
751    /// matches the lenient behavior of many real-world tar implementations.
752    ///
753    /// Default: `false` (malformed values produce errors).
754    pub fn set_ignore_pax_errors(&mut self, ignore: bool) {
755        self.ignore_pax_errors = ignore;
756    }
757
758    /// Create a new parser with default limits.
759    #[must_use]
760    pub fn with_defaults() -> Self {
761        Self::new(Limits::default())
762    }
763
764    /// Get the current limits.
765    #[must_use]
766    pub fn limits(&self) -> &Limits {
767        &self.limits
768    }
769
770    /// Check if the parser is done (archive complete).
771    #[must_use]
772    pub fn is_done(&self) -> bool {
773        self.state == State::Done
774    }
775
776    /// Parse the next event from the input buffer.
777    ///
778    /// Returns a [`ParseEvent`] on success. `Entry` and `End` events include
779    /// a `consumed` field indicating how many bytes were consumed from the
780    /// input; the caller should advance past that many bytes in their buffer.
781    ///
782    /// # Events
783    ///
784    /// - `NeedData { min_bytes }`: Need at least `min_bytes` more data (nothing consumed)
785    /// - `Entry { consumed, entry }`: A complete entry header; caller must handle content
786    /// - `End { consumed }`: Archive is complete
787    ///
788    /// After receiving an `Entry` event, the caller is responsible for
789    /// reading or skipping `entry.size` bytes of content (plus padding to
790    /// the next 512-byte boundary) before calling `parse()` again.
791    pub fn parse<'a>(&mut self, input: &'a [u8]) -> Result<ParseEvent<'a>> {
792        match self.state {
793            State::Done => Ok(ParseEvent::End { consumed: 0 }),
794            State::ReadHeader => self.parse_header(input, PendingMetadata::default()),
795        }
796    }
797
798    /// Parse a header from the input.
799    fn parse_header<'a>(
800        &mut self,
801        input: &'a [u8],
802        slices: PendingMetadata<'a>,
803    ) -> Result<ParseEvent<'a>> {
804        // Need at least one header block
805        if input.len() < HEADER_SIZE {
806            return Ok(ParseEvent::NeedData {
807                min_bytes: HEADER_SIZE,
808            });
809        }
810
811        // Check for zero block (end of archive marker).
812        //
813        // NB: No state mutation happens before a potential NeedData return,
814        // so the caller can safely retry with more data.
815        let header_bytes: &[u8; HEADER_SIZE] = input[..HEADER_SIZE]
816            .try_into()
817            .expect("already checked input.len() >= HEADER_SIZE");
818        if header_bytes.iter().all(|&b| b == 0) {
819            // Need a second block to decide whether this is end-of-archive
820            // or a stray zero block.
821            if input.len() < 2 * HEADER_SIZE {
822                return Ok(ParseEvent::NeedData {
823                    min_bytes: 2 * HEADER_SIZE,
824                });
825            }
826            // Check second block
827            let second_block = &input[HEADER_SIZE..2 * HEADER_SIZE];
828            if second_block.iter().all(|&b| b == 0) {
829                self.state = State::Done;
830                if !slices.is_empty() {
831                    return Err(ParseError::OrphanedMetadata);
832                }
833                return Ok(ParseEvent::End {
834                    consumed: 2 * HEADER_SIZE,
835                });
836            }
837            // Not end of archive — single stray zero block; skip it and
838            // continue with the next block as a header.
839            return self
840                .parse_header(&input[HEADER_SIZE..], slices)
841                .map(|e| e.add_consumed(HEADER_SIZE));
842        }
843
844        // Check pending entry limit
845        if slices.count > self.limits.max_pending_entries {
846            return Err(ParseError::TooManyPendingEntries {
847                count: slices.count,
848                limit: self.limits.max_pending_entries,
849            });
850        }
851
852        // Parse header
853        let header = Header::from_bytes(header_bytes);
854        if self.verify_checksums {
855            header.verify_checksum()?;
856        }
857
858        let entry_type = header.entry_type();
859        let size = header.entry_size()?;
860        let padded_size = size
861            .checked_next_multiple_of(HEADER_SIZE as u64)
862            .ok_or(ParseError::InvalidSize(size))?;
863
864        // Metadata entry types (GNU long name/link, PAX headers, GNU sparse)
865        // only make sense in archives that actually use those formats. A V7-
866        // style header whose type flag byte happens to be 'L' or 'x' should
867        // be treated as a regular entry, not as a metadata extension. This
868        // matches tar-rs's `is_recognized_header` guard.
869        let is_extension_format = header.is_gnu() || header.is_ustar();
870        match entry_type {
871            EntryType::GnuLongName if is_extension_format => {
872                self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongName, slices)
873            }
874            EntryType::GnuLongLink if is_extension_format => {
875                self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongLink, slices)
876            }
877            EntryType::XHeader if is_extension_format => {
878                self.handle_extension(input, size, padded_size, ExtensionKind::Pax, slices)
879            }
880            // Global PAX headers (type 'g') are defined by POSIX
881            // independently of the UStar/GNU magic, so we always handle
882            // them here. Routing through emit_entry would fail because
883            // global headers have arbitrary metadata fields.
884            EntryType::XGlobalHeader => {
885                // Check size limit
886                if size > self.limits.max_metadata_size as u64 {
887                    return Err(ParseError::MetadataTooLarge {
888                        size,
889                        limit: self.limits.max_metadata_size,
890                    });
891                }
892
893                let total_size = (HEADER_SIZE as u64)
894                    .checked_add(padded_size)
895                    .ok_or(ParseError::InvalidSize(size))?;
896                if (input.len() as u64) < total_size {
897                    return Ok(ParseEvent::NeedData {
898                        min_bytes: total_size as usize,
899                    });
900                }
901
902                let content_start = HEADER_SIZE;
903                let content_end = content_start + size as usize;
904                let pax_data = &input[content_start..content_end];
905
906                Ok(ParseEvent::GlobalExtensions {
907                    consumed: total_size as usize,
908                    pax_data,
909                })
910            }
911            EntryType::GnuSparse if is_extension_format => {
912                self.handle_gnu_sparse(input, header, size, slices)
913            }
914            _ => {
915                // Check for PAX v1.0 sparse before emitting — it requires
916                // reading the sparse map from the data stream.
917                let sparse_version = if let Some(pax) = slices.pax_extensions {
918                    pax_sparse_version(pax, self.ignore_pax_errors)?
919                } else {
920                    None
921                };
922                if sparse_version == Some((1, 0)) {
923                    self.handle_pax_sparse_v1(input, header, size, slices)
924                } else {
925                    // Actual entry — emit_entry handles v0.0/v0.1 PAX sparse
926                    // inline during PAX extension processing.
927                    self.emit_entry(header, size, None, slices)
928                }
929            }
930        }
931    }
932
933    /// Process a GNU long name/link or PAX extension entry.
934    ///
935    /// Extracts the extension data as a borrowed slice (zero-copy), adds it
936    /// to `slices`, and recurses to parse the next header. No state is stored
937    /// in `self`, so on `NeedData` the recursion simply unwinds — the caller
938    /// retries from scratch, re-parsing the extension chain.
939    fn handle_extension<'a>(
940        &mut self,
941        input: &'a [u8],
942        size: u64,
943        padded_size: u64,
944        kind: ExtensionKind,
945        slices: PendingMetadata<'a>,
946    ) -> Result<ParseEvent<'a>> {
947        // Check for duplicate
948        let has_dup = match kind {
949            ExtensionKind::GnuLongName => slices.gnu_long_name.is_some(),
950            ExtensionKind::GnuLongLink => slices.gnu_long_link.is_some(),
951            ExtensionKind::Pax => slices.pax_extensions.is_some(),
952        };
953        if has_dup {
954            return Err(match kind {
955                ExtensionKind::GnuLongName => ParseError::DuplicateGnuLongName,
956                ExtensionKind::GnuLongLink => ParseError::DuplicateGnuLongLink,
957                ExtensionKind::Pax => ParseError::DuplicatePaxHeader,
958            });
959        }
960
961        // Check aggregate metadata size limit
962        let new_metadata_size = slices.metadata_size + size;
963        if new_metadata_size > self.limits.max_metadata_size as u64 {
964            return Err(ParseError::MetadataTooLarge {
965                size: new_metadata_size,
966                limit: self.limits.max_metadata_size,
967            });
968        }
969
970        let total_size = (HEADER_SIZE as u64)
971            .checked_add(padded_size)
972            .ok_or(ParseError::InvalidSize(size))?;
973        if (input.len() as u64) < total_size {
974            return Ok(ParseEvent::NeedData {
975                min_bytes: total_size as usize,
976            });
977        }
978
979        // Extract content as a borrowed slice (zero-copy)
980        let content_start = HEADER_SIZE;
981        let content_end = content_start + size as usize;
982        let mut data: &'a [u8] = &input[content_start..content_end];
983
984        // Strip trailing null for GNU long name/link
985        if matches!(
986            kind,
987            ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink
988        ) {
989            if let Some(trimmed) = data.strip_suffix(&[0]) {
990                data = trimmed;
991            }
992            self.limits.check_path_len(data.len())?;
993        }
994
995        // Build new pending metadata with the added extension data
996        let mut new_slices = PendingMetadata {
997            count: slices.count + 1,
998            metadata_size: new_metadata_size,
999            ..slices
1000        };
1001        match kind {
1002            ExtensionKind::GnuLongName => new_slices.gnu_long_name = Some(data),
1003            ExtensionKind::GnuLongLink => new_slices.gnu_long_link = Some(data),
1004            ExtensionKind::Pax => new_slices.pax_extensions = Some(data),
1005        }
1006
1007        self.parse_header(&input[total_size as usize..], new_slices)
1008            .map(|e| e.add_consumed(total_size as usize))
1009    }
1010
1011    /// Handle a PAX v1.0 sparse entry.
1012    ///
1013    /// The sparse map is encoded as newline-delimited decimal values at
1014    /// the start of the file's data block:
1015    ///
1016    /// ```text
1017    /// <num_entries>\n
1018    /// <offset_0>\n
1019    /// <length_0>\n
1020    /// ...
1021    /// ```
1022    ///
1023    /// followed by padding to the next 512-byte boundary. This prefix is
1024    /// consumed by the parser and not included in the entry's content.
1025    fn handle_pax_sparse_v1<'a>(
1026        &mut self,
1027        input: &'a [u8],
1028        header: &'a Header,
1029        size: u64,
1030        slices: PendingMetadata<'a>,
1031    ) -> Result<ParseEvent<'a>> {
1032        // Extract sparse metadata from PAX extensions.
1033        let pax = slices
1034            .pax_extensions
1035            .ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1036                "missing PAX extensions",
1037            )))?;
1038
1039        let ignore_errors = self.ignore_pax_errors;
1040        let mut real_size = None;
1041        let mut sparse_name = None;
1042        for ext in PaxExtensions::new(pax) {
1043            let ext = ext?;
1044            let key = match ext.key() {
1045                Ok(k) => k,
1046                Err(_) if ignore_errors => continue,
1047                Err(e) => return Err(ParseError::from(e)),
1048            };
1049            match key {
1050                PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
1051                    let s = match ext.value() {
1052                        Ok(s) => s,
1053                        Err(_) if ignore_errors => continue,
1054                        Err(_) => {
1055                            return Err(ParseError::InvalidPaxValue {
1056                                key: PAX_GNU_SPARSE_REALSIZE,
1057                                value: Cow::Borrowed("<non-UTF-8>"),
1058                            })
1059                        }
1060                    };
1061                    match s.parse::<u64>() {
1062                        Ok(v) => real_size = Some(v),
1063                        Err(_) if ignore_errors => {}
1064                        Err(_) => {
1065                            return Err(ParseError::InvalidPaxValue {
1066                                key: PAX_GNU_SPARSE_REALSIZE,
1067                                value: s.to_owned().into(),
1068                            })
1069                        }
1070                    }
1071                }
1072                PAX_GNU_SPARSE_NAME => {
1073                    sparse_name = Some(ext.value_bytes());
1074                }
1075                _ => {}
1076            }
1077        }
1078
1079        let real_size = real_size.ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1080            "missing GNU.sparse.realsize",
1081        )))?;
1082
1083        // The sparse map data starts right after the header (at offset
1084        // HEADER_SIZE within the input). We need to parse it without
1085        // knowing its exact size upfront — we read line by line.
1086        //
1087        // To remain sans-IO, we scan the available input. If we don't
1088        // have enough, return NeedData.
1089        let data_start = HEADER_SIZE;
1090        let data = &input[data_start..];
1091
1092        // Parse newline-delimited sparse map.
1093        let mut pos = 0usize;
1094
1095        // Helper: read next decimal line from data[pos..]
1096        let read_line = |data: &[u8], pos: &mut usize| -> Option<Result<u64>> {
1097            let remaining = &data[*pos..];
1098            let nl = remaining.iter().position(|&b| b == b'\n')?;
1099            let line = &remaining[..nl];
1100            *pos += nl + 1;
1101            let s = match core::str::from_utf8(line) {
1102                Ok(s) => s,
1103                Err(_) => {
1104                    return Some(Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1105                        "non-UTF8 in sparse map",
1106                    ))))
1107                }
1108            };
1109            match s.parse::<u64>() {
1110                Ok(v) => Some(Ok(v)),
1111                Err(_) => Some(Err(ParseError::InvalidPaxSparseMap(
1112                    format!("invalid decimal: {s:?}").into(),
1113                ))),
1114            }
1115        };
1116
1117        // Read the entry count.
1118        let num_entries = match read_line(data, &mut pos) {
1119            Some(r) => r?,
1120            None => {
1121                // Need more data — we need at least enough to see the
1122                // first newline. Request a generous amount.
1123                return Ok(ParseEvent::NeedData {
1124                    min_bytes: data_start + pos + HEADER_SIZE,
1125                });
1126            }
1127        };
1128
1129        if num_entries as usize > self.limits.max_sparse_entries {
1130            return Err(ParseError::TooManySparseEntries {
1131                count: num_entries as usize,
1132                limit: self.limits.max_sparse_entries,
1133            });
1134        }
1135
1136        // Cap pre-allocation to avoid trusting the claimed count for memory.
1137        // The actual loop below will still process exactly num_entries items.
1138        let mut sparse_map = Vec::with_capacity((num_entries as usize).min(1024));
1139        for _ in 0..num_entries {
1140            let offset = match read_line(data, &mut pos) {
1141                Some(r) => r?,
1142                None => {
1143                    return Ok(ParseEvent::NeedData {
1144                        min_bytes: data_start + pos + HEADER_SIZE,
1145                    });
1146                }
1147            };
1148            let length = match read_line(data, &mut pos) {
1149                Some(r) => r?,
1150                None => {
1151                    return Ok(ParseEvent::NeedData {
1152                        min_bytes: data_start + pos + HEADER_SIZE,
1153                    });
1154                }
1155            };
1156            sparse_map.push(SparseEntry { offset, length });
1157        }
1158
1159        // The sparse map data is padded to a 512-byte boundary.
1160        let map_size = pos.next_multiple_of(HEADER_SIZE);
1161
1162        // Verify we have enough input for the padded map.
1163        if data.len() < map_size {
1164            return Ok(ParseEvent::NeedData {
1165                min_bytes: data_start + map_size,
1166            });
1167        }
1168
1169        // The remaining content size is the original size minus the
1170        // sparse map prefix (including padding).
1171        let content_size =
1172            size.checked_sub(map_size as u64)
1173                .ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1174                    "sparse map prefix larger than entry size",
1175                )))?;
1176
1177        let sparse_ctx = SparseContext {
1178            sparse_map,
1179            real_size,
1180            // Extension consumed = the sparse map data prefix.
1181            ext_consumed: map_size,
1182        };
1183
1184        // Override the path with GNU.sparse.name if present by
1185        // stashing it in the slices so emit_entry picks it up.
1186        let slices = if let Some(name) = sparse_name {
1187            PendingMetadata {
1188                gnu_long_name: Some(name),
1189                ..slices
1190            }
1191        } else {
1192            slices
1193        };
1194
1195        self.emit_entry(header, content_size, Some(sparse_ctx), slices)
1196    }
1197
1198    /// Handle a GNU sparse entry (type 'S').
1199    ///
1200    /// Reads the inline sparse descriptors from the GNU header and any
1201    /// extension blocks that follow. Returns NeedData if the extension
1202    /// blocks aren't fully available yet (side-effect-free: no state is
1203    /// mutated before we know we have enough data).
1204    fn handle_gnu_sparse<'a>(
1205        &mut self,
1206        input: &'a [u8],
1207        header: &'a Header,
1208        size: u64,
1209        slices: PendingMetadata<'a>,
1210    ) -> Result<ParseEvent<'a>> {
1211        let gnu = header.try_as_gnu().ok_or(ParseError::SparseNotGnu)?;
1212        let real_size = gnu.real_size()?;
1213
1214        // Collect sparse entries from the 4 inline descriptors.
1215        let mut sparse_map = Vec::new();
1216        for desc in &gnu.sparse {
1217            if desc.is_empty() {
1218                break;
1219            }
1220            let entry = desc.to_sparse_entry()?;
1221            sparse_map.push(entry);
1222        }
1223
1224        // If there are extension blocks, we need to read them all.
1225        // Each extension block is 512 bytes and may chain to the next.
1226        // We must not mutate any state before we know we have enough input,
1227        // so we scan forward to find all extension blocks first.
1228        let mut ext_consumed = 0usize;
1229        if gnu.is_extended() {
1230            let mut offset = HEADER_SIZE; // start past the main header
1231            loop {
1232                if input.len() < offset + HEADER_SIZE {
1233                    return Ok(ParseEvent::NeedData {
1234                        min_bytes: offset + HEADER_SIZE,
1235                    });
1236                }
1237
1238                let ext_bytes: &[u8; HEADER_SIZE] = input[offset..offset + HEADER_SIZE]
1239                    .try_into()
1240                    .expect("checked length");
1241                let ext = GnuExtSparseHeader::ref_from_bytes(ext_bytes)
1242                    .expect("GnuExtSparseHeader is 512 bytes");
1243
1244                for desc in &ext.sparse {
1245                    if desc.is_empty() {
1246                        break;
1247                    }
1248                    if sparse_map.len() >= self.limits.max_sparse_entries {
1249                        return Err(ParseError::TooManySparseEntries {
1250                            count: sparse_map.len() + 1,
1251                            limit: self.limits.max_sparse_entries,
1252                        });
1253                    }
1254                    let entry = desc.to_sparse_entry()?;
1255                    sparse_map.push(entry);
1256                }
1257
1258                offset += HEADER_SIZE;
1259
1260                if !ext.is_extended() {
1261                    break;
1262                }
1263            }
1264            ext_consumed = offset - HEADER_SIZE; // bytes consumed by extension blocks
1265        }
1266
1267        // Also check the inline descriptors against the limit.
1268        if sparse_map.len() > self.limits.max_sparse_entries {
1269            return Err(ParseError::TooManySparseEntries {
1270                count: sparse_map.len(),
1271                limit: self.limits.max_sparse_entries,
1272            });
1273        }
1274
1275        let sparse_ctx = SparseContext {
1276            sparse_map,
1277            real_size,
1278            ext_consumed,
1279        };
1280
1281        self.emit_entry(header, size, Some(sparse_ctx), slices)
1282    }
1283
1284    fn emit_entry<'a>(
1285        &mut self,
1286        header: &'a Header,
1287        size: u64,
1288        sparse: Option<SparseContext>,
1289        slices: PendingMetadata<'a>,
1290    ) -> Result<ParseEvent<'a>> {
1291        // Start with header values
1292        let mut path: Cow<'a, [u8]> = Cow::Borrowed(header.path_bytes());
1293        let mut link_target: Option<Cow<'a, [u8]>> = None;
1294        let mut uid = header.uid()?;
1295        let mut gid = header.gid()?;
1296        let mut mtime = header.mtime()?;
1297        let mut entry_size = size;
1298        let mut xattrs = Vec::new();
1299        let mut uname: Option<Cow<'a, [u8]>> = header
1300            .username()
1301            .filter(|b| !b.is_empty())
1302            .map(Cow::Borrowed);
1303        let mut gname: Option<Cow<'a, [u8]>> = header
1304            .groupname()
1305            .filter(|b| !b.is_empty())
1306            .map(Cow::Borrowed);
1307
1308        // Handle UStar prefix for path
1309        if let Some(prefix) = header.prefix() {
1310            if !prefix.is_empty() {
1311                let mut full_path = prefix.to_vec();
1312                full_path.push(b'/');
1313                full_path.extend_from_slice(header.path_bytes());
1314                path = Cow::Owned(full_path);
1315            }
1316        }
1317
1318        // Apply GNU long name (overrides header + prefix)
1319        if let Some(long_name) = slices.gnu_long_name {
1320            path = Cow::Borrowed(long_name);
1321        }
1322
1323        // Apply GNU long link
1324        if let Some(long_link) = slices.gnu_long_link {
1325            link_target = Some(Cow::Borrowed(long_link));
1326        } else {
1327            let header_link = header.link_name_bytes();
1328            if !header_link.is_empty() {
1329                link_target = Some(Cow::Borrowed(header_link));
1330            }
1331        }
1332
1333        // Apply PAX extensions (highest priority)
1334        let raw_pax = slices.pax_extensions;
1335
1336        // PAX sparse v0.0/v0.1 tracking. v0.0 uses repeated offset/numbytes
1337        // pairs; v0.1 uses a single comma-separated map string.
1338        let mut pax_sparse_map: Option<Vec<SparseEntry>> = None;
1339        let mut pax_sparse_real_size: Option<u64> = None;
1340        let mut pax_sparse_name: Option<&'a [u8]> = None;
1341        // v0.0: current offset waiting for its numbytes pair
1342        let mut pax_sparse_pending_offset: Option<u64> = None;
1343
1344        if let Some(pax) = raw_pax {
1345            let ignore_errors = self.ignore_pax_errors;
1346            let extensions = PaxExtensions::new(pax);
1347
1348            // Helper: parse a PAX numeric value, returning Ok(None) when
1349            // ignore_pax_errors is set and the value is unparseable.
1350            let parse_pax_u64 =
1351                |ext: &crate::PaxExtension<'_>, key: &'static str| -> Result<Option<u64>> {
1352                    let s = match ext.value() {
1353                        Ok(s) => s,
1354                        Err(_) if ignore_errors => return Ok(None),
1355                        Err(_) => {
1356                            return Err(ParseError::InvalidPaxValue {
1357                                key,
1358                                value: Cow::Borrowed("<non-UTF-8>"),
1359                            })
1360                        }
1361                    };
1362                    match s.parse::<u64>() {
1363                        Ok(v) => Ok(Some(v)),
1364                        Err(_) if ignore_errors => Ok(None),
1365                        Err(_) => Err(ParseError::InvalidPaxValue {
1366                            key,
1367                            value: s.to_owned().into(),
1368                        }),
1369                    }
1370                };
1371
1372            for ext in extensions {
1373                let ext = ext?;
1374                let key = ext.key().map_err(ParseError::from)?;
1375                let value = ext.value_bytes();
1376
1377                match key {
1378                    PAX_PATH => {
1379                        self.limits.check_path_len(value.len())?;
1380                        path = Cow::Borrowed(value);
1381                    }
1382                    PAX_LINKPATH => {
1383                        self.limits.check_path_len(value.len())?;
1384                        link_target = Some(Cow::Borrowed(value));
1385                    }
1386                    PAX_SIZE => {
1387                        if let Some(v) = parse_pax_u64(&ext, PAX_SIZE)? {
1388                            entry_size = v;
1389                        }
1390                    }
1391                    PAX_UID => {
1392                        if let Some(v) = parse_pax_u64(&ext, PAX_UID)? {
1393                            uid = v;
1394                        }
1395                    }
1396                    PAX_GID => {
1397                        if let Some(v) = parse_pax_u64(&ext, PAX_GID)? {
1398                            gid = v;
1399                        }
1400                    }
1401                    PAX_MTIME => {
1402                        // mtime may have fractional seconds (e.g. "1234567890.5");
1403                        // parse only the integer part.
1404                        let s = match ext.value() {
1405                            Ok(s) => s,
1406                            Err(_) if ignore_errors => continue,
1407                            Err(_) => {
1408                                return Err(ParseError::InvalidPaxValue {
1409                                    key: PAX_MTIME,
1410                                    value: Cow::Borrowed("<non-UTF-8>"),
1411                                })
1412                            }
1413                        };
1414                        let int_part = s.split('.').next().unwrap_or(s);
1415                        match int_part.parse::<u64>() {
1416                            Ok(v) => mtime = v,
1417                            Err(_) if ignore_errors => {}
1418                            Err(_) => {
1419                                return Err(ParseError::InvalidPaxValue {
1420                                    key: PAX_MTIME,
1421                                    value: s.to_owned().into(),
1422                                })
1423                            }
1424                        }
1425                    }
1426                    PAX_UNAME => {
1427                        uname = Some(Cow::Borrowed(value));
1428                    }
1429                    PAX_GNAME => {
1430                        gname = Some(Cow::Borrowed(value));
1431                    }
1432
1433                    // PAX sparse v0.0: repeated offset/numbytes pairs
1434                    PAX_GNU_SPARSE_OFFSET => {
1435                        let v = parse_pax_u64(&ext, PAX_GNU_SPARSE_OFFSET)?;
1436                        pax_sparse_pending_offset = v;
1437                    }
1438                    PAX_GNU_SPARSE_NUMBYTES => {
1439                        if let (Some(offset), Some(length)) = (
1440                            pax_sparse_pending_offset.take(),
1441                            parse_pax_u64(&ext, PAX_GNU_SPARSE_NUMBYTES)?,
1442                        ) {
1443                            let map = pax_sparse_map.get_or_insert_with(Vec::new);
1444                            if map.len() >= self.limits.max_sparse_entries {
1445                                return Err(ParseError::TooManySparseEntries {
1446                                    count: map.len() + 1,
1447                                    limit: self.limits.max_sparse_entries,
1448                                });
1449                            }
1450                            map.push(SparseEntry { offset, length });
1451                        }
1452                    }
1453
1454                    // PAX sparse v0.1: comma-separated map
1455                    PAX_GNU_SPARSE_MAP => {
1456                        let s = match ext.value() {
1457                            Ok(s) => s,
1458                            Err(_) if ignore_errors => continue,
1459                            Err(_) => {
1460                                return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1461                                    "non-UTF8 sparse map",
1462                                )))
1463                            }
1464                        };
1465                        let mut map = Vec::new();
1466                        let parts: Vec<&str> = s.split(',').filter(|p| !p.is_empty()).collect();
1467                        if parts.len() % 2 != 0 {
1468                            return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1469                                "odd number of values in GNU.sparse.map",
1470                            )));
1471                        }
1472                        for pair in parts.chunks(2) {
1473                            if map.len() >= self.limits.max_sparse_entries {
1474                                return Err(ParseError::TooManySparseEntries {
1475                                    count: map.len() + 1,
1476                                    limit: self.limits.max_sparse_entries,
1477                                });
1478                            }
1479                            let offset = pair[0].parse::<u64>().map_err(|_| {
1480                                ParseError::InvalidPaxSparseMap(
1481                                    format!("invalid offset: {:?}", pair[0]).into(),
1482                                )
1483                            })?;
1484                            let length = pair[1].parse::<u64>().map_err(|_| {
1485                                ParseError::InvalidPaxSparseMap(
1486                                    format!("invalid length: {:?}", pair[1]).into(),
1487                                )
1488                            })?;
1489                            map.push(SparseEntry { offset, length });
1490                        }
1491                        pax_sparse_map = Some(map);
1492                    }
1493
1494                    // PAX sparse: real size and name (shared across versions)
1495                    PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
1496                        if let Some(v) = parse_pax_u64(&ext, PAX_GNU_SPARSE_REALSIZE)? {
1497                            pax_sparse_real_size = Some(v);
1498                        }
1499                    }
1500                    PAX_GNU_SPARSE_NAME => {
1501                        self.limits.check_path_len(value.len())?;
1502                        pax_sparse_name = Some(value);
1503                    }
1504
1505                    // Skip version fields — already handled in
1506                    // pending_pax_sparse_version() for v1.0 routing.
1507                    PAX_GNU_SPARSE_MAJOR | PAX_GNU_SPARSE_MINOR => {}
1508
1509                    _ => {
1510                        if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
1511                            xattrs
1512                                .push((Cow::Borrowed(attr_name.as_bytes()), Cow::Borrowed(value)));
1513                        }
1514                    }
1515                }
1516            }
1517        }
1518
1519        // Apply PAX sparse name override (highest priority for path).
1520        if let Some(name) = pax_sparse_name {
1521            path = Cow::Borrowed(name);
1522        }
1523
1524        // Normalize: empty optional byte fields are semantically equivalent to
1525        // absent.  PAX overrides and GNU long link can set empty values that
1526        // would otherwise surface as `Some([])` instead of `None`.
1527        if link_target.as_ref().is_some_and(|v| v.is_empty()) {
1528            link_target = None;
1529        }
1530        if uname.as_ref().is_some_and(|v| v.is_empty()) {
1531            uname = None;
1532        }
1533        if gname.as_ref().is_some_and(|v| v.is_empty()) {
1534            gname = None;
1535        }
1536
1537        // Reject entries with empty paths
1538        if path.is_empty() && !self.allow_empty_path {
1539            return Err(ParseError::EmptyPath);
1540        }
1541
1542        // Validate final path length
1543        self.limits.check_path_len(path.len())?;
1544
1545        let entry = ParsedEntry {
1546            header,
1547            entry_type: header.entry_type(),
1548            path,
1549            link_target,
1550            mode: header.mode()?,
1551            uid,
1552            gid,
1553            mtime,
1554            size: entry_size,
1555            uname,
1556            gname,
1557            dev_major: header.device_major()?,
1558            dev_minor: header.device_minor()?,
1559            xattrs,
1560            pax: raw_pax,
1561        };
1562
1563        // Determine the sparse context. Priority:
1564        // 1. Explicit sparse context (from GNU sparse type 'S' or PAX v1.0)
1565        // 2. PAX sparse v0.0/v0.1 data collected during PAX processing
1566        let sparse = sparse.or_else(|| {
1567            pax_sparse_map.map(|map| SparseContext {
1568                sparse_map: map,
1569                real_size: pax_sparse_real_size.unwrap_or(entry_size),
1570                ext_consumed: 0, // PAX v0.0/v0.1 has no extra blocks
1571            })
1572        });
1573
1574        if let Some(ctx) = sparse {
1575            // Consume the main header plus any extension blocks.
1576            Ok(ParseEvent::SparseEntry {
1577                consumed: HEADER_SIZE + ctx.ext_consumed,
1578                entry,
1579                sparse_map: ctx.sparse_map,
1580                real_size: ctx.real_size,
1581            })
1582        } else {
1583            // Only consume the header - content is left for caller
1584            Ok(ParseEvent::Entry {
1585                consumed: HEADER_SIZE,
1586                entry,
1587            })
1588        }
1589    }
1590}
1591
1592#[cfg(test)]
1593mod tests {
1594    use super::*;
1595    use crate::{GNU_MAGIC, GNU_VERSION, USTAR_MAGIC, USTAR_VERSION};
1596
1597    #[test]
1598    fn test_default_limits() {
1599        let limits = Limits::default();
1600        assert_eq!(limits.max_metadata_size, 1024 * 1024);
1601        assert_eq!(limits.max_path_len, None);
1602        assert_eq!(limits.max_pending_entries, 16);
1603    }
1604
1605    #[test]
1606    fn test_permissive_limits() {
1607        let limits = Limits::permissive();
1608        assert_eq!(limits.max_metadata_size, u32::MAX);
1609        assert_eq!(limits.max_path_len, None);
1610    }
1611
1612    #[test]
1613    fn test_permissive_limits_relaxed() {
1614        let limits = Limits::permissive();
1615        assert!(limits.max_metadata_size > Limits::default().max_metadata_size);
1616        assert!(limits.max_pending_entries > Limits::default().max_pending_entries);
1617    }
1618
1619    #[test]
1620    fn test_parser_empty_archive() {
1621        let mut parser = Parser::new(Limits::default());
1622
1623        // Two zero blocks = end of archive
1624        let data = [0u8; 1024];
1625
1626        let event = parser.parse(&data).unwrap();
1627        assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1628        assert!(parser.is_done());
1629    }
1630
1631    #[test]
1632    fn test_parser_need_data() {
1633        let mut parser = Parser::new(Limits::default());
1634
1635        // Not enough data for a header
1636        let data = [0u8; 256];
1637
1638        let event = parser.parse(&data).unwrap();
1639        assert!(matches!(event, ParseEvent::NeedData { min_bytes: 512 }));
1640    }
1641
1642    #[test]
1643    fn test_parser_need_more_for_end() {
1644        let mut parser = Parser::new(Limits::default());
1645
1646        // One zero block - need second to confirm end
1647        let data = [0u8; 512];
1648
1649        let event = parser.parse(&data).unwrap();
1650        assert!(matches!(event, ParseEvent::NeedData { min_bytes: 1024 }));
1651    }
1652
1653    #[test]
1654    fn test_parser_with_real_header() {
1655        let mut parser = Parser::new(Limits::default());
1656
1657        // Create a minimal valid tar header
1658        let mut data = vec![0u8; 2048];
1659
1660        // Set up header at offset 0
1661        // name: "test.txt"
1662        data[0..8].copy_from_slice(b"test.txt");
1663        // mode: 0000644
1664        data[100..107].copy_from_slice(b"0000644");
1665        // uid: 0
1666        data[108..115].copy_from_slice(b"0000000");
1667        // gid: 0
1668        data[116..123].copy_from_slice(b"0000000");
1669        // size: 0 (empty file)
1670        data[124..135].copy_from_slice(b"00000000000");
1671        // mtime: 0
1672        data[136..147].copy_from_slice(b"00000000000");
1673        // typeflag: '0' (regular file)
1674        data[156] = b'0';
1675        // magic: "ustar\0"
1676        data[257..263].copy_from_slice(USTAR_MAGIC);
1677        // version: "00"
1678        data[263..265].copy_from_slice(USTAR_VERSION);
1679
1680        // Compute and set checksum
1681        let header = Header::from_bytes((&data[..512]).try_into().unwrap());
1682        let checksum = header.compute_checksum();
1683        let checksum_str = format!("{checksum:06o}\0 ");
1684        data[148..156].copy_from_slice(checksum_str.as_bytes());
1685
1686        // Two zero blocks at the end
1687        // data[512..1536] is already zeros
1688
1689        let event = parser.parse(&data).unwrap();
1690        match event {
1691            ParseEvent::Entry { consumed, entry } => {
1692                assert_eq!(consumed, 512);
1693                assert_eq!(entry.path_lossy(), "test.txt");
1694                assert_eq!(entry.size, 0);
1695                assert!(entry.is_file());
1696            }
1697            other => panic!("Expected Entry, got {:?}", other),
1698        }
1699
1700        // Now parse end
1701        let event = parser.parse(&data[512..]).unwrap();
1702        assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1703    }
1704
1705    #[test]
1706    fn test_parser_entry_with_content() {
1707        let mut parser = Parser::new(Limits::default());
1708
1709        // Create a tar with a file containing "hello"
1710        let mut data = vec![0u8; 2560]; // header + content block + 2 zero blocks
1711
1712        // Header
1713        data[0..8].copy_from_slice(b"test.txt");
1714        data[100..107].copy_from_slice(b"0000644");
1715        data[108..115].copy_from_slice(b"0000000");
1716        data[116..123].copy_from_slice(b"0000000");
1717        data[124..135].copy_from_slice(b"00000000005"); // size = 5
1718        data[136..147].copy_from_slice(b"00000000000");
1719        data[156] = b'0';
1720        data[257..263].copy_from_slice(USTAR_MAGIC);
1721        data[263..265].copy_from_slice(USTAR_VERSION);
1722
1723        // Checksum
1724        let header = Header::from_bytes((&data[..512]).try_into().unwrap());
1725        let checksum = header.compute_checksum();
1726        let checksum_str = format!("{checksum:06o}\0 ");
1727        data[148..156].copy_from_slice(checksum_str.as_bytes());
1728
1729        // Content at 512..517
1730        data[512..517].copy_from_slice(b"hello");
1731
1732        let event = parser.parse(&data).unwrap();
1733        match event {
1734            ParseEvent::Entry { consumed, entry } => {
1735                assert_eq!(consumed, 512);
1736                assert_eq!(entry.path_lossy(), "test.txt");
1737                assert_eq!(entry.size, 5);
1738                assert_eq!(entry.padded_size(), 512);
1739            }
1740            other => panic!("Expected Entry, got {:?}", other),
1741        }
1742
1743        // Content at data[512..517], padded to 512.
1744        // Caller skips past content + padding, then parses the next header.
1745
1746        // Parse end (zero blocks at 1024..2048)
1747        let event = parser.parse(&data[1024..]).unwrap();
1748        assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1749    }
1750
1751    // =========================================================================
1752    // Helper functions for building test tar archives
1753    // =========================================================================
1754
1755    /// Create a valid tar header with computed checksum.
1756    ///
1757    /// # Arguments
1758    /// * `name` - File name (max 100 bytes)
1759    /// * `size` - Content size in bytes
1760    /// * `typeflag` - Entry type (b'0' for regular, b'L' for GNU long name, etc.)
1761    fn make_header(name: &[u8], size: u64, typeflag: u8) -> [u8; HEADER_SIZE] {
1762        let mut header = [0u8; HEADER_SIZE];
1763
1764        // name (0..100)
1765        let name_len = name.len().min(100);
1766        header[0..name_len].copy_from_slice(&name[..name_len]);
1767
1768        // mode (100..108): 0000644
1769        header[100..107].copy_from_slice(b"0000644");
1770
1771        // uid (108..116): 0001750 (1000 in octal)
1772        header[108..115].copy_from_slice(b"0001750");
1773
1774        // gid (116..124): 0001750 (1000 in octal)
1775        header[116..123].copy_from_slice(b"0001750");
1776
1777        // size (124..136): 11-digit octal
1778        let size_str = format!("{size:011o}");
1779        header[124..135].copy_from_slice(size_str.as_bytes());
1780
1781        // mtime (136..148): arbitrary timestamp
1782        header[136..147].copy_from_slice(b"14712345670");
1783
1784        // typeflag (156)
1785        header[156] = typeflag;
1786
1787        // magic (257..263): "ustar\0"
1788        header[257..263].copy_from_slice(USTAR_MAGIC);
1789
1790        // version (263..265): "00"
1791        header[263..265].copy_from_slice(USTAR_VERSION);
1792
1793        // Compute and set checksum
1794        let hdr = Header::from_bytes(&header);
1795        let checksum = hdr.compute_checksum();
1796        let checksum_str = format!("{checksum:06o}\0 ");
1797        header[148..156].copy_from_slice(checksum_str.as_bytes());
1798
1799        header
1800    }
1801
1802    /// Create a tar header with a link target (for symlinks/hardlinks).
1803    fn make_link_header(name: &[u8], link_target: &[u8], typeflag: u8) -> [u8; HEADER_SIZE] {
1804        let mut header = make_header(name, 0, typeflag);
1805
1806        // linkname (157..257)
1807        let link_len = link_target.len().min(100);
1808        header[157..157 + link_len].copy_from_slice(&link_target[..link_len]);
1809
1810        // Recompute checksum
1811        let hdr = Header::from_bytes(&header);
1812        let checksum = hdr.compute_checksum();
1813        let checksum_str = format!("{checksum:06o}\0 ");
1814        header[148..156].copy_from_slice(checksum_str.as_bytes());
1815
1816        header
1817    }
1818
1819    /// Create a GNU long name entry (type 'L') with the given long name.
1820    ///
1821    /// Returns the complete entry: header + padded content.
1822    fn make_gnu_long_name(name: &[u8]) -> Vec<u8> {
1823        // GNU long name: content is the name with a trailing null
1824        let content_size = name.len() + 1; // +1 for null terminator
1825        let padded = content_size.next_multiple_of(HEADER_SIZE);
1826        let header = make_header(b"././@LongLink", content_size as u64, b'L');
1827
1828        let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1829        result.extend_from_slice(&header);
1830        result.extend_from_slice(name);
1831        result.push(0); // null terminator
1832        result.extend(zeroes(padded - content_size));
1833
1834        result
1835    }
1836
1837    /// Create a GNU long link entry (type 'K') with the given long link target.
1838    ///
1839    /// Returns the complete entry: header + padded content.
1840    fn make_gnu_long_link(link: &[u8]) -> Vec<u8> {
1841        let content_size = link.len() + 1; // +1 for null terminator
1842        let padded = content_size.next_multiple_of(HEADER_SIZE);
1843        let header = make_header(b"././@LongLink", content_size as u64, b'K');
1844
1845        let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1846        result.extend_from_slice(&header);
1847        result.extend_from_slice(link);
1848        result.push(0); // null terminator
1849        result.extend(zeroes(padded - content_size));
1850
1851        result
1852    }
1853
1854    /// Build a PAX-style header (local 'x' or global 'g') with the given key-value pairs.
1855    fn make_pax_entry(name: &[u8], type_flag: u8, entries: &[(&str, &[u8])]) -> Vec<u8> {
1856        use crate::builder::DecU64;
1857
1858        // Build PAX content: each record is "<length> <key>=<value>\n"
1859        let mut content = Vec::new();
1860        for (key, value) in entries {
1861            // rest_len covers: " " + key + "=" + value + "\n"
1862            let rest_len = 3 + key.len() + value.len();
1863            let mut len_len = 1;
1864            let mut max_len = 10;
1865            while rest_len + len_len >= max_len {
1866                len_len += 1;
1867                max_len *= 10;
1868            }
1869            let total_len = rest_len + len_len;
1870            let len_dec = DecU64::new(total_len as u64);
1871            content.extend_from_slice(len_dec.as_bytes());
1872            content.push(b' ');
1873            content.extend_from_slice(key.as_bytes());
1874            content.push(b'=');
1875            content.extend_from_slice(value);
1876            content.push(b'\n');
1877        }
1878
1879        let content_size = content.len();
1880        let header = make_header(name, content_size as u64, type_flag);
1881
1882        let padded = content_size.next_multiple_of(HEADER_SIZE);
1883        let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1884        result.extend_from_slice(&header);
1885        result.extend_from_slice(&content);
1886        result.extend(zeroes(padded - content_size));
1887
1888        result
1889    }
1890
1891    fn make_pax_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
1892        make_pax_entry(b"PaxHeader/file", b'x', entries)
1893    }
1894
1895    fn make_pax_global_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
1896        make_pax_entry(b"pax_global_header", b'g', entries)
1897    }
1898
1899    /// Return `n` zero bytes (for end-of-archive markers, padding, etc.).
1900    fn zeroes(n: usize) -> impl Iterator<Item = u8> {
1901        std::iter::repeat_n(0u8, n)
1902    }
1903
1904    // =========================================================================
1905    // GNU long name tests
1906    // =========================================================================
1907
1908    #[test]
1909    fn test_parser_gnu_long_name() {
1910        // Create archive with GNU long name entry followed by actual file
1911        let long_name =
1912            "very/long/path/that/exceeds/one/hundred/bytes/".to_string() + &"x".repeat(60);
1913        assert!(long_name.len() > 100);
1914
1915        let mut archive = Vec::new();
1916        archive.extend(make_gnu_long_name(long_name.as_bytes()));
1917        archive.extend_from_slice(&make_header(b"placeholder", 5, b'0'));
1918        // Content: "hello"
1919        let mut content_block = [0u8; 512];
1920        content_block[0..5].copy_from_slice(b"hello");
1921        archive.extend_from_slice(&content_block);
1922        archive.extend(zeroes(1024));
1923
1924        let mut parser = Parser::new(Limits::default());
1925        let event = parser.parse(&archive).unwrap();
1926
1927        // Should consume GNU long name header + content + actual header
1928        let consumed = match &event {
1929            ParseEvent::Entry { consumed, entry } => {
1930                assert!(*consumed > 512);
1931                assert_eq!(entry.path.as_ref(), long_name.as_bytes());
1932                assert_eq!(entry.size, 5);
1933                assert!(entry.is_file());
1934                *consumed
1935            }
1936            other => panic!("Expected Entry, got {:?}", other),
1937        };
1938
1939        // Parse end (skip past content + padding)
1940        let remaining = &archive[consumed + 512..];
1941        let event = parser.parse(remaining).unwrap();
1942        assert!(matches!(event, ParseEvent::End { .. }));
1943    }
1944
1945    // =========================================================================
1946    // GNU long link tests
1947    // =========================================================================
1948
1949    #[test]
1950    fn test_parser_gnu_long_link() {
1951        // Create archive with GNU long link entry followed by symlink
1952        let long_target = "/some/very/long/symlink/target/path/".to_string() + &"t".repeat(80);
1953        assert!(long_target.len() > 100);
1954
1955        let mut archive = Vec::new();
1956        archive.extend(make_gnu_long_link(long_target.as_bytes()));
1957        // Symlink header with placeholder linkname
1958        archive.extend_from_slice(&make_link_header(b"mylink", b"placeholder", b'2'));
1959        archive.extend(zeroes(1024));
1960
1961        let mut parser = Parser::new(Limits::default());
1962        let event = parser.parse(&archive).unwrap();
1963
1964        let consumed = match &event {
1965            ParseEvent::Entry { consumed, entry } => {
1966                assert_eq!(entry.path.as_ref(), b"mylink");
1967                assert!(entry.is_symlink());
1968                assert_eq!(
1969                    entry.link_target.as_ref().unwrap().as_ref(),
1970                    long_target.as_bytes()
1971                );
1972                *consumed
1973            }
1974            other => panic!("Expected Entry, got {:?}", other),
1975        };
1976
1977        let remaining = &archive[consumed..];
1978        let event = parser.parse(remaining).unwrap();
1979        assert!(matches!(event, ParseEvent::End { .. }));
1980    }
1981
1982    // =========================================================================
1983    // PAX extension tests
1984    // =========================================================================
1985
1986    #[test]
1987    fn test_parser_pax_path_override() {
1988        // PAX header should override the path in the actual header
1989        let pax_path = "pax/overridden/path/to/file.txt";
1990
1991        let mut archive = Vec::new();
1992        archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
1993        archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
1994        archive.extend(zeroes(1024));
1995
1996        let mut parser = Parser::new(Limits::default());
1997        let event = parser.parse(&archive).unwrap();
1998
1999        match event {
2000            ParseEvent::Entry { entry, .. } => {
2001                assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
2002            }
2003            other => panic!("Expected Entry, got {:?}", other),
2004        }
2005    }
2006
2007    #[test]
2008    fn test_parser_pax_size_override() {
2009        // PAX header should override the size in the actual header
2010        let mut archive = Vec::new();
2011        archive.extend(make_pax_header(&[("size", b"999")]));
2012        // Header says size=5, but PAX says 999
2013        archive.extend_from_slice(&make_header(b"file.txt", 5, b'0'));
2014        // We still need content padded to the PAX size for proper parsing
2015        archive.extend(zeroes(1024)); // More than enough
2016
2017        let mut parser = Parser::new(Limits::default());
2018        let event = parser.parse(&archive).unwrap();
2019
2020        match event {
2021            ParseEvent::Entry { entry, .. } => {
2022                assert_eq!(entry.size, 999);
2023            }
2024            other => panic!("Expected Entry, got {:?}", other),
2025        }
2026    }
2027
2028    #[test]
2029    fn test_parser_pax_metadata() {
2030        // PAX header overriding uid, gid, and mtime
2031        let mut archive = Vec::new();
2032        archive.extend(make_pax_header(&[
2033            ("uid", b"65534"),
2034            ("gid", b"65535"),
2035            ("mtime", b"1700000000.123456789"),
2036        ]));
2037        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2038        archive.extend(zeroes(1024));
2039
2040        let mut parser = Parser::new(Limits::default());
2041        let event = parser.parse(&archive).unwrap();
2042
2043        match event {
2044            ParseEvent::Entry { entry, .. } => {
2045                assert_eq!(entry.uid, 65534);
2046                assert_eq!(entry.gid, 65535);
2047                // mtime should be the integer part only
2048                assert_eq!(entry.mtime, 1700000000);
2049            }
2050            other => panic!("Expected Entry, got {:?}", other),
2051        }
2052    }
2053
2054    #[test]
2055    fn test_parser_pax_xattr() {
2056        // PAX SCHILY.xattr.* entries for extended attributes
2057        let mut archive = Vec::new();
2058        archive.extend(make_pax_header(&[
2059            ("SCHILY.xattr.user.test", b"test_value"),
2060            (
2061                "SCHILY.xattr.security.selinux",
2062                b"system_u:object_r:unlabeled_t:s0",
2063            ),
2064        ]));
2065        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2066        archive.extend(zeroes(1024));
2067
2068        let mut parser = Parser::new(Limits::default());
2069        let event = parser.parse(&archive).unwrap();
2070
2071        match event {
2072            ParseEvent::Entry { entry, .. } => {
2073                assert_eq!(entry.xattrs.len(), 2);
2074
2075                // Check xattrs (order should be preserved)
2076                assert_eq!(entry.xattrs[0].0.as_ref(), b"user.test");
2077                assert_eq!(entry.xattrs[0].1.as_ref(), b"test_value");
2078
2079                assert_eq!(entry.xattrs[1].0.as_ref(), b"security.selinux");
2080                assert_eq!(
2081                    entry.xattrs[1].1.as_ref(),
2082                    b"system_u:object_r:unlabeled_t:s0"
2083                );
2084            }
2085            other => panic!("Expected Entry, got {:?}", other),
2086        }
2087    }
2088
2089    #[test]
2090    fn test_parser_pax_raw_bytes_preserved() {
2091        // The raw PAX data should be available in ParsedEntry::pax
2092        // so callers can iterate all key-value pairs, not just the ones
2093        // tar-core resolves into struct fields.
2094        let mut archive = Vec::new();
2095        archive.extend(make_pax_header(&[
2096            ("path", b"custom/path.txt"),
2097            ("SCHILY.xattr.user.key", b"val"),
2098            ("myfancykey", b"myfancyvalue"),
2099        ]));
2100        archive.extend_from_slice(&make_header(b"orig.txt", 0, b'0'));
2101        archive.extend(zeroes(1024));
2102
2103        let mut parser = Parser::new(Limits::default());
2104        let event = parser.parse(&archive).unwrap();
2105
2106        match event {
2107            ParseEvent::Entry { entry, .. } => {
2108                // Resolved fields work as before
2109                assert_eq!(entry.path.as_ref(), b"custom/path.txt");
2110                assert_eq!(entry.xattrs.len(), 1);
2111
2112                // Raw PAX data is preserved
2113                let raw = entry.pax.expect("pax should be Some");
2114                let exts = PaxExtensions::new(raw);
2115                let keys: Vec<&str> = exts
2116                    .filter_map(|e| e.ok())
2117                    .filter_map(|e| e.key().ok())
2118                    .collect();
2119                assert_eq!(keys, &["path", "SCHILY.xattr.user.key", "myfancykey"]);
2120            }
2121            other => panic!("Expected Entry, got {:?}", other),
2122        }
2123    }
2124
2125    #[test]
2126    fn test_parser_no_pax_means_none() {
2127        // Entries without PAX extensions should have pax == None
2128        let mut archive = Vec::new();
2129        archive.extend_from_slice(&make_header(b"plain.txt", 0, b'0'));
2130        archive.extend(zeroes(1024));
2131
2132        let mut parser = Parser::new(Limits::default());
2133        let event = parser.parse(&archive).unwrap();
2134
2135        match event {
2136            ParseEvent::Entry { entry, .. } => {
2137                assert!(entry.pax.is_none());
2138            }
2139            other => panic!("Expected Entry, got {:?}", other),
2140        }
2141    }
2142
2143    #[test]
2144    fn test_parser_gnu_long_name_no_pax() {
2145        // GNU long name without PAX should still have pax == None
2146        let long_name = "long/path/".to_string() + &"x".repeat(100);
2147        let mut archive = Vec::new();
2148        archive.extend(make_gnu_long_name(long_name.as_bytes()));
2149        archive.extend_from_slice(&make_header(b"short", 0, b'0'));
2150        archive.extend(zeroes(1024));
2151
2152        let mut parser = Parser::new(Limits::default());
2153        let event = parser.parse(&archive).unwrap();
2154
2155        match event {
2156            ParseEvent::Entry { entry, .. } => {
2157                assert_eq!(entry.path.as_ref(), long_name.as_bytes());
2158                assert!(entry.pax.is_none());
2159            }
2160            other => panic!("Expected Entry, got {:?}", other),
2161        }
2162    }
2163
2164    #[test]
2165    fn test_parser_pax_linkpath() {
2166        // PAX linkpath for symlink targets
2167        let pax_linkpath = "/a/very/long/symlink/target/from/pax";
2168
2169        let mut archive = Vec::new();
2170        archive.extend(make_pax_header(&[("linkpath", pax_linkpath.as_bytes())]));
2171        archive.extend_from_slice(&make_link_header(b"mylink", b"short", b'2'));
2172        archive.extend(zeroes(1024));
2173
2174        let mut parser = Parser::new(Limits::default());
2175        let event = parser.parse(&archive).unwrap();
2176
2177        match event {
2178            ParseEvent::Entry { entry, .. } => {
2179                assert!(entry.is_symlink());
2180                assert_eq!(
2181                    entry.link_target.as_ref().unwrap().as_ref(),
2182                    pax_linkpath.as_bytes()
2183                );
2184            }
2185            other => panic!("Expected Entry, got {:?}", other),
2186        }
2187    }
2188
2189    // =========================================================================
2190    // PAX global header tests
2191    // =========================================================================
2192
2193    #[test]
2194    fn test_parser_global_pax_header() {
2195        // A global PAX header should be surfaced as a GlobalExtensions event,
2196        // not silently skipped.
2197        let mut archive = Vec::new();
2198        archive.extend(make_pax_global_header(&[
2199            ("mtime", b"1700000000"),
2200            (
2201                "SCHILY.xattr.security.selinux",
2202                b"system_u:object_r:default_t:s0",
2203            ),
2204        ]));
2205        // Followed by a regular file entry
2206        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2207        archive.extend(zeroes(1024));
2208
2209        let mut parser = Parser::new(Limits::default());
2210
2211        // First event: GlobalExtensions
2212        let event = parser.parse(&archive).unwrap();
2213        let consumed = match &event {
2214            ParseEvent::GlobalExtensions { consumed, pax_data } => {
2215                // Verify the raw PAX data can be parsed
2216                let exts = PaxExtensions::new(pax_data);
2217                let keys: Vec<&str> = exts
2218                    .filter_map(|e| e.ok())
2219                    .filter_map(|e| e.key().ok())
2220                    .collect();
2221                assert_eq!(keys, &["mtime", "SCHILY.xattr.security.selinux"]);
2222                *consumed
2223            }
2224            other => panic!("Expected GlobalExtensions, got {:?}", other),
2225        };
2226
2227        // Second event: the actual file entry (global headers don't affect it)
2228        let event = parser.parse(&archive[consumed..]).unwrap();
2229        match event {
2230            ParseEvent::Entry { entry, .. } => {
2231                assert_eq!(entry.path_lossy(), "file.txt");
2232                // Global header should NOT have modified this entry's metadata
2233                assert!(entry.pax.is_none());
2234            }
2235            other => panic!("Expected Entry, got {:?}", other),
2236        }
2237    }
2238
2239    #[test]
2240    fn test_parser_global_pax_header_need_data() {
2241        // Global PAX header present but content not yet available
2242        let header = make_header(b"pax_global_header", 100, b'g');
2243
2244        let mut parser = Parser::new(Limits::default());
2245        let event = parser.parse(&header).unwrap();
2246
2247        match event {
2248            ParseEvent::NeedData { min_bytes } => {
2249                assert_eq!(min_bytes, 1024); // header (512) + padded content (512)
2250            }
2251            other => panic!("Expected NeedData, got {:?}", other),
2252        }
2253    }
2254
2255    #[test]
2256    fn test_parser_global_pax_header_too_large() {
2257        // Global PAX header exceeding max_metadata_size should error
2258        let large_value = "x".repeat(1000);
2259
2260        let mut archive = Vec::new();
2261        archive.extend(make_pax_global_header(&[(
2262            "comment",
2263            large_value.as_bytes(),
2264        )]));
2265        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2266        archive.extend(zeroes(1024));
2267
2268        let limits = Limits {
2269            max_metadata_size: 100,
2270            ..Default::default()
2271        };
2272        let mut parser = Parser::new(limits);
2273        let result = parser.parse(&archive);
2274
2275        assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2276    }
2277
2278    #[test]
2279    fn test_parser_multiple_global_pax_headers() {
2280        // Multiple global PAX headers in a row should each produce a
2281        // separate GlobalExtensions event (they don't use the pending
2282        // metadata mechanism).
2283        let mut archive = Vec::new();
2284        archive.extend(make_pax_global_header(&[("comment", b"first")]));
2285        archive.extend(make_pax_global_header(&[("comment", b"second")]));
2286        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2287        archive.extend(zeroes(1024));
2288
2289        let mut parser = Parser::new(Limits::default());
2290
2291        // First global header
2292        let event = parser.parse(&archive).unwrap();
2293        let consumed1 = match &event {
2294            ParseEvent::GlobalExtensions { consumed, pax_data } => {
2295                let exts: Vec<_> = PaxExtensions::new(pax_data)
2296                    .filter_map(|e| e.ok())
2297                    .collect();
2298                assert_eq!(exts[0].value_bytes(), b"first");
2299                *consumed
2300            }
2301            other => panic!("Expected GlobalExtensions, got {:?}", other),
2302        };
2303
2304        // Second global header
2305        let event = parser.parse(&archive[consumed1..]).unwrap();
2306        let consumed2 = match &event {
2307            ParseEvent::GlobalExtensions { consumed, pax_data } => {
2308                let exts: Vec<_> = PaxExtensions::new(pax_data)
2309                    .filter_map(|e| e.ok())
2310                    .collect();
2311                assert_eq!(exts[0].value_bytes(), b"second");
2312                *consumed
2313            }
2314            other => panic!("Expected GlobalExtensions, got {:?}", other),
2315        };
2316
2317        // Then the actual file entry
2318        let event = parser.parse(&archive[consumed1 + consumed2..]).unwrap();
2319        assert!(matches!(event, ParseEvent::Entry { .. }));
2320    }
2321
2322    #[test]
2323    fn test_parser_global_pax_does_not_interfere_with_local_pax() {
2324        // A global PAX header followed by a local PAX header should produce
2325        // both events independently.
2326        let mut archive = Vec::new();
2327        archive.extend(make_pax_global_header(&[("mtime", b"1000000000")]));
2328        archive.extend(make_pax_header(&[("path", b"overridden.txt")]));
2329        archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
2330        archive.extend(zeroes(1024));
2331
2332        let mut parser = Parser::new(Limits::default());
2333
2334        // First: global extensions event
2335        let event = parser.parse(&archive).unwrap();
2336        let consumed = match &event {
2337            ParseEvent::GlobalExtensions { consumed, .. } => *consumed,
2338            other => panic!("Expected GlobalExtensions, got {:?}", other),
2339        };
2340
2341        // Second: entry with local PAX applied
2342        let event = parser.parse(&archive[consumed..]).unwrap();
2343        match event {
2344            ParseEvent::Entry { entry, .. } => {
2345                // Local PAX should have been applied
2346                assert_eq!(entry.path.as_ref(), b"overridden.txt");
2347                assert!(entry.pax.is_some());
2348            }
2349            other => panic!("Expected Entry, got {:?}", other),
2350        }
2351    }
2352
2353    // =========================================================================
2354    // Error case tests
2355    // =========================================================================
2356
2357    #[test]
2358    fn test_parser_orphaned_metadata() {
2359        // GNU long name entry followed by end of archive (no actual entry)
2360        let mut archive = Vec::new();
2361        archive.extend(make_gnu_long_name(b"some/long/name/here"));
2362        archive.extend(zeroes(1024));
2363
2364        let mut parser = Parser::new(Limits::default());
2365        let result = parser.parse(&archive);
2366
2367        assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
2368    }
2369
2370    #[test]
2371    fn test_parser_orphaned_pax_metadata() {
2372        // PAX header followed by end of archive
2373        let mut archive = Vec::new();
2374        archive.extend(make_pax_header(&[("path", b"test")]));
2375        archive.extend(zeroes(1024));
2376
2377        let mut parser = Parser::new(Limits::default());
2378        let result = parser.parse(&archive);
2379
2380        assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
2381    }
2382
2383    #[test]
2384    fn test_parser_duplicate_gnu_long_name() {
2385        // Two GNU long name entries in a row should error
2386        let mut archive = Vec::new();
2387        archive.extend(make_gnu_long_name(b"first/long/name"));
2388        archive.extend(make_gnu_long_name(b"second/long/name"));
2389        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2390        archive.extend(zeroes(1024));
2391
2392        let mut parser = Parser::new(Limits::default());
2393        let result = parser.parse(&archive);
2394
2395        assert!(matches!(result, Err(ParseError::DuplicateGnuLongName)));
2396    }
2397
2398    #[test]
2399    fn test_parser_duplicate_gnu_long_link() {
2400        // Two GNU long link entries in a row should error
2401        let mut archive = Vec::new();
2402        archive.extend(make_gnu_long_link(b"first/long/target"));
2403        archive.extend(make_gnu_long_link(b"second/long/target"));
2404        archive.extend_from_slice(&make_link_header(b"link", b"x", b'2'));
2405        archive.extend(zeroes(1024));
2406
2407        let mut parser = Parser::new(Limits::default());
2408        let result = parser.parse(&archive);
2409
2410        assert!(matches!(result, Err(ParseError::DuplicateGnuLongLink)));
2411    }
2412
2413    #[test]
2414    fn test_parser_duplicate_pax_header() {
2415        // Two PAX headers in a row should error
2416        let mut archive = Vec::new();
2417        archive.extend(make_pax_header(&[("path", b"first")]));
2418        archive.extend(make_pax_header(&[("path", b"second")]));
2419        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2420        archive.extend(zeroes(1024));
2421
2422        let mut parser = Parser::new(Limits::default());
2423        let result = parser.parse(&archive);
2424
2425        assert!(matches!(result, Err(ParseError::DuplicatePaxHeader)));
2426    }
2427
2428    // =========================================================================
2429    // Combined GNU and PAX tests
2430    // =========================================================================
2431
2432    #[test]
2433    fn test_parser_combined_gnu_pax() {
2434        // Both GNU long name and PAX path - PAX should win
2435        let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100);
2436        let pax_path = "pax/should/win/file.txt";
2437
2438        let mut archive = Vec::new();
2439        archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
2440        archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
2441        archive.extend_from_slice(&make_header(b"header.txt", 0, b'0'));
2442        archive.extend(zeroes(1024));
2443
2444        let mut parser = Parser::new(Limits::default());
2445        let event = parser.parse(&archive).unwrap();
2446
2447        match event {
2448            ParseEvent::Entry { entry, .. } => {
2449                // PAX path should override GNU long name
2450                assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
2451            }
2452            other => panic!("Expected Entry, got {:?}", other),
2453        }
2454    }
2455
2456    #[test]
2457    fn test_parser_gnu_long_name_and_link_combined() {
2458        // Both GNU long name and long link for the same entry
2459        let long_name = "long/symlink/name/".to_string() + &"n".repeat(100);
2460        let long_target = "long/target/path/".to_string() + &"t".repeat(100);
2461
2462        let mut archive = Vec::new();
2463        archive.extend(make_gnu_long_name(long_name.as_bytes()));
2464        archive.extend(make_gnu_long_link(long_target.as_bytes()));
2465        archive.extend_from_slice(&make_link_header(b"short", b"short", b'2'));
2466        archive.extend(zeroes(1024));
2467
2468        let mut parser = Parser::new(Limits::default());
2469        let event = parser.parse(&archive).unwrap();
2470
2471        match event {
2472            ParseEvent::Entry { entry, .. } => {
2473                assert_eq!(entry.path.as_ref(), long_name.as_bytes());
2474                assert_eq!(
2475                    entry.link_target.as_ref().unwrap().as_ref(),
2476                    long_target.as_bytes()
2477                );
2478                assert!(entry.is_symlink());
2479            }
2480            other => panic!("Expected Entry, got {:?}", other),
2481        }
2482    }
2483
2484    #[test]
2485    fn test_parser_pax_multiple_entries() {
2486        // Multiple PAX entries for different files
2487        let mut archive = Vec::new();
2488
2489        // First file with PAX
2490        archive.extend(make_pax_header(&[("path", b"first/file.txt")]));
2491        archive.extend_from_slice(&make_header(b"f1", 5, b'0'));
2492        let mut content1 = [0u8; 512];
2493        content1[0..5].copy_from_slice(b"hello");
2494        archive.extend_from_slice(&content1);
2495
2496        // Second file with PAX
2497        archive.extend(make_pax_header(&[("path", b"second/file.txt")]));
2498        archive.extend_from_slice(&make_header(b"f2", 5, b'0'));
2499        let mut content2 = [0u8; 512];
2500        content2[0..5].copy_from_slice(b"world");
2501        archive.extend_from_slice(&content2);
2502
2503        archive.extend(zeroes(1024));
2504
2505        let mut parser = Parser::new(Limits::default());
2506
2507        // Parse first entry
2508        let event1 = parser.parse(&archive).unwrap();
2509        let consumed1 = match &event1 {
2510            ParseEvent::Entry { consumed, entry } => {
2511                assert_eq!(entry.path.as_ref(), b"first/file.txt");
2512                assert_eq!(entry.size, 5);
2513                *consumed
2514            }
2515            other => panic!("Expected Entry, got {:?}", other),
2516        };
2517
2518        // Parse second entry (skip past first entry's content + padding)
2519        let offset = consumed1 + 512;
2520        let event2 = parser.parse(&archive[offset..]).unwrap();
2521        let consumed2 = match &event2 {
2522            ParseEvent::Entry { consumed, entry } => {
2523                assert_eq!(entry.path.as_ref(), b"second/file.txt");
2524                assert_eq!(entry.size, 5);
2525                *consumed
2526            }
2527            other => panic!("Expected Entry, got {:?}", other),
2528        };
2529
2530        // Parse end (skip past second entry's content + padding)
2531        let final_offset = offset + consumed2 + 512;
2532        let event3 = parser.parse(&archive[final_offset..]).unwrap();
2533        assert!(matches!(event3, ParseEvent::End { .. }));
2534    }
2535
2536    #[test]
2537    fn test_parser_pax_uname_gname() {
2538        // PAX uname and gname override
2539        let mut archive = Vec::new();
2540        archive.extend(make_pax_header(&[
2541            ("uname", b"testuser"),
2542            ("gname", b"testgroup"),
2543        ]));
2544        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2545        archive.extend(zeroes(1024));
2546
2547        let mut parser = Parser::new(Limits::default());
2548        let event = parser.parse(&archive).unwrap();
2549
2550        match event {
2551            ParseEvent::Entry { entry, .. } => {
2552                assert_eq!(entry.uname.as_ref().unwrap().as_ref(), b"testuser");
2553                assert_eq!(entry.gname.as_ref().unwrap().as_ref(), b"testgroup");
2554            }
2555            other => panic!("Expected Entry, got {:?}", other),
2556        }
2557    }
2558
2559    // =========================================================================
2560    // Size limit tests
2561    // =========================================================================
2562
2563    #[test]
2564    fn test_parser_gnu_long_too_large() {
2565        let long_name = "x".repeat(200);
2566
2567        let mut archive = Vec::new();
2568        archive.extend(make_gnu_long_name(long_name.as_bytes()));
2569        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2570        archive.extend(zeroes(1024));
2571
2572        let limits = Limits {
2573            max_metadata_size: 100,
2574            ..Default::default()
2575        };
2576        let mut parser = Parser::new(limits);
2577        let result = parser.parse(&archive);
2578
2579        assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2580    }
2581
2582    #[test]
2583    fn test_parser_pax_path_too_long() {
2584        let long_path = "x".repeat(200);
2585
2586        let mut archive = Vec::new();
2587        archive.extend(make_pax_header(&[("path", long_path.as_bytes())]));
2588        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2589        archive.extend(zeroes(1024));
2590
2591        let limits = Limits {
2592            max_path_len: Some(100),
2593            ..Default::default()
2594        };
2595        let mut parser = Parser::new(limits);
2596        let result = parser.parse(&archive);
2597
2598        assert!(matches!(
2599            result,
2600            Err(ParseError::PathTooLong {
2601                len: 200,
2602                limit: 100
2603            })
2604        ));
2605    }
2606
2607    #[test]
2608    fn test_parser_pax_too_large() {
2609        // Create a PAX header that exceeds the metadata size limit
2610        let large_value = "x".repeat(1000);
2611
2612        let mut archive = Vec::new();
2613        archive.extend(make_pax_header(&[("path", large_value.as_bytes())]));
2614        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2615        archive.extend(zeroes(1024));
2616
2617        let limits = Limits {
2618            max_metadata_size: 100,
2619            ..Default::default()
2620        };
2621        let mut parser = Parser::new(limits);
2622        let result = parser.parse(&archive);
2623
2624        assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2625    }
2626
2627    // =========================================================================
2628    // Need data tests for extension entries
2629    // =========================================================================
2630
2631    #[test]
2632    fn test_parser_need_data_for_gnu_long_content() {
2633        // Create a GNU long name header, but don't provide the content
2634        let header = make_header(b"././@LongLink", 200, b'L');
2635
2636        let mut parser = Parser::new(Limits::default());
2637        let event = parser.parse(&header).unwrap();
2638
2639        // Need header (512) + padded content (512) = 1024
2640        match event {
2641            ParseEvent::NeedData { min_bytes } => {
2642                assert_eq!(min_bytes, 1024);
2643            }
2644            other => panic!("Expected NeedData, got {:?}", other),
2645        }
2646    }
2647
2648    #[test]
2649    fn test_parser_need_data_for_pax_content() {
2650        // Create a PAX header, but don't provide the content
2651        let header = make_header(b"PaxHeader/file", 100, b'x');
2652
2653        let mut parser = Parser::new(Limits::default());
2654        let event = parser.parse(&header).unwrap();
2655
2656        // Need header (512) + padded content (512) = 1024
2657        match event {
2658            ParseEvent::NeedData { min_bytes } => {
2659                assert_eq!(min_bytes, 1024);
2660            }
2661            other => panic!("Expected NeedData, got {:?}", other),
2662        }
2663    }
2664
2665    #[test]
2666    fn test_need_data_adjusted_through_extension_headers() {
2667        // Regression test: NeedData.min_bytes must be relative to the
2668        // original buffer, not the recursive sub-slice.
2669        //
2670        // Provide a complete GNU long name entry (header + content = 1024 bytes)
2671        // but no following header. The recursive parse_header call on the
2672        // sub-slice needs 512 more bytes for the next header. After
2673        // add_consumed(1024), min_bytes must be 1024 + 512 = 1536.
2674        let long_name = "long/path/name/".to_string() + &"x".repeat(90);
2675        let gnu_entry = make_gnu_long_name(long_name.as_bytes());
2676        // gnu_entry is header(512) + padded_content(512) = 1024 bytes
2677        assert_eq!(gnu_entry.len(), 1024);
2678
2679        let mut parser = Parser::new(Limits::default());
2680        let event = parser.parse(&gnu_entry).unwrap();
2681
2682        match event {
2683            ParseEvent::NeedData { min_bytes } => {
2684                // The recursive call needs 512 bytes (one header) from its
2685                // sub-slice. add_consumed(1024) must adjust this to 1536.
2686                assert_eq!(
2687                    min_bytes, 1536,
2688                    "NeedData.min_bytes must account for bytes consumed by \
2689                     extension headers (1024 + 512 = 1536)"
2690                );
2691            }
2692            other => panic!("Expected NeedData, got {:?}", other),
2693        }
2694    }
2695
2696    /// Test for CVE-2025-62518 (TARmageddon): PAX size must override header size
2697    ///
2698    /// The vulnerability occurs when:
2699    /// 1. PAX header specifies size=X (e.g., 1024)
2700    /// 2. ustar header specifies size=0
2701    /// 3. Vulnerable parser uses header size (0) instead of PAX size (1024)
2702    /// 4. Parser advances 0 bytes, treating nested tar content as outer entries
2703    ///
2704    /// tar-core MUST use PAX size for content advancement to be secure.
2705    #[test]
2706    fn test_cve_2025_62518_pax_size_overrides_header() {
2707        // PAX header with size=1024
2708        let pax_entries: &[(&str, &[u8])] = &[("size", b"1024")];
2709        let pax_data = make_pax_header(pax_entries);
2710
2711        // Actual file header with size=0 in ustar (the attack vector!)
2712        // A vulnerable parser would skip 0 bytes and see the "content" as headers
2713        let file_header = make_header(b"nested.tar", 0, b'0'); // size=0 in header!
2714
2715        // The "content" - in an attack this would be a nested tar archive
2716        // with malicious files that get extracted
2717        let mut content = vec![0u8; 1024];
2718        // Put something that looks like a tar header to detect if parser is confused
2719        content[0..9].copy_from_slice(b"MALICIOUS");
2720        content[156] = b'0'; // Would be parsed as regular file if vulnerable
2721
2722        // Build full archive: PAX header + actual header + content + padding + end markers
2723        let mut archive = Vec::new();
2724        archive.extend_from_slice(&pax_data);
2725        archive.extend_from_slice(&file_header);
2726        archive.extend_from_slice(&content);
2727        // Pad to 512 boundary (1024 is already aligned)
2728        archive.extend(zeroes(1024));
2729
2730        // Parse
2731        let mut parser = Parser::new(Limits::default());
2732        let event = parser.parse(&archive).unwrap();
2733
2734        let consumed = match &event {
2735            ParseEvent::Entry { consumed, entry } => {
2736                // CRITICAL: entry.size MUST be 1024 (from PAX), not 0 (from header)
2737                assert_eq!(
2738                    entry.size, 1024,
2739                    "CVE-2025-62518: Parser MUST use PAX size (1024), not header size (0)"
2740                );
2741
2742                // padded_size should also be 1024
2743                assert_eq!(entry.padded_size(), 1024, "Padded size must match PAX size");
2744
2745                // Path should be from header
2746                assert_eq!(entry.path_lossy(), "nested.tar");
2747
2748                *consumed
2749            }
2750            other => panic!("Expected Entry, got {:?}", other),
2751        };
2752
2753        // Continue parsing - should get End, NOT another entry
2754        let remaining = &archive[consumed + 1024..]; // consumed headers + 1024 bytes content
2755        let event = parser.parse(remaining).unwrap();
2756
2757        match event {
2758            ParseEvent::End { .. } => {
2759                // Correct! Parser properly skipped the 1024-byte content
2760            }
2761            ParseEvent::Entry { entry, .. } => {
2762                panic!(
2763                    "CVE-2025-62518 VULNERABLE: Parser found unexpected entry '{}' \
2764                     because it used header size (0) instead of PAX size (1024)",
2765                    entry.path_lossy()
2766                );
2767            }
2768            other => panic!("Expected End, got {:?}", other),
2769        }
2770    }
2771
2772    /// Additional test: ensure parser state tracks PAX-overridden size
2773    #[test]
2774    fn test_pax_size_affects_parser_state() {
2775        // PAX specifies 512 bytes, header says 0
2776        let pax_entries: &[(&str, &[u8])] = &[("size", b"512")];
2777        let pax_data = make_pax_header(pax_entries);
2778        let file_header = make_header(b"test.bin", 0, b'0');
2779
2780        let content = vec![0u8; 512];
2781        let mut archive = Vec::new();
2782        archive.extend_from_slice(&pax_data);
2783        archive.extend_from_slice(&file_header);
2784        archive.extend_from_slice(&content);
2785        archive.extend(zeroes(1024));
2786
2787        let mut parser = Parser::new(Limits::default());
2788
2789        // Parse entry
2790        let event = parser.parse(&archive).unwrap();
2791        let size = match event {
2792            ParseEvent::Entry { entry, .. } => entry.size,
2793            other => panic!("Expected Entry, got {:?}", other),
2794        };
2795
2796        assert_eq!(size, 512, "Entry size must reflect PAX override");
2797    }
2798
2799    // =========================================================================
2800    // Strict mode tests
2801    // =========================================================================
2802
2803    /// Build a minimal archive with a PAX header overriding `key` to `value`,
2804    /// followed by a regular file entry and end-of-archive.
2805    fn make_archive_with_pax(key: &str, value: &[u8]) -> Vec<u8> {
2806        let mut archive = Vec::new();
2807        archive.extend(make_pax_header(&[(key, value)]));
2808        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2809        archive.extend(zeroes(1024));
2810        archive
2811    }
2812
2813    #[test]
2814    fn test_strict_rejects_invalid_pax_uid() {
2815        let archive = make_archive_with_pax("uid", b"notanumber");
2816        let mut parser = Parser::new(Limits::default());
2817        let err = parser.parse(&archive).unwrap_err();
2818        assert!(
2819            matches!(err, ParseError::InvalidPaxValue { key: "uid", .. }),
2820            "expected InvalidPaxValue for uid, got {err:?}"
2821        );
2822    }
2823
2824    #[test]
2825    fn test_strict_rejects_invalid_pax_size() {
2826        let archive = make_archive_with_pax("size", b"xyz");
2827        let mut parser = Parser::new(Limits::default());
2828        let err = parser.parse(&archive).unwrap_err();
2829        assert!(matches!(
2830            err,
2831            ParseError::InvalidPaxValue { key: "size", .. }
2832        ));
2833    }
2834
2835    #[test]
2836    fn test_strict_rejects_invalid_pax_gid() {
2837        let archive = make_archive_with_pax("gid", b"bad");
2838        let mut parser = Parser::new(Limits::default());
2839        let err = parser.parse(&archive).unwrap_err();
2840        assert!(matches!(
2841            err,
2842            ParseError::InvalidPaxValue { key: "gid", .. }
2843        ));
2844    }
2845
2846    #[test]
2847    fn test_strict_rejects_invalid_pax_mtime() {
2848        let archive = make_archive_with_pax("mtime", b"nottime");
2849        let mut parser = Parser::new(Limits::default());
2850        let err = parser.parse(&archive).unwrap_err();
2851        assert!(matches!(
2852            err,
2853            ParseError::InvalidPaxValue { key: PAX_MTIME, .. }
2854        ));
2855    }
2856
2857    #[test]
2858    fn test_lenient_ignores_invalid_pax_uid() {
2859        let archive = make_archive_with_pax("uid", b"notanumber");
2860        let mut parser = Parser::new(Limits::default());
2861        parser.set_ignore_pax_errors(true);
2862        let event = parser.parse(&archive).unwrap();
2863        match event {
2864            ParseEvent::Entry { entry, .. } => {
2865                // Should fall back to header uid (1000 from make_header)
2866                assert_eq!(entry.uid, 1000);
2867            }
2868            other => panic!("Expected Entry, got {other:?}"),
2869        }
2870    }
2871
2872    #[test]
2873    fn test_lenient_ignores_invalid_pax_size() {
2874        let archive = make_archive_with_pax("size", b"xyz");
2875        let mut parser = Parser::new(Limits::default());
2876        parser.set_ignore_pax_errors(true);
2877        let event = parser.parse(&archive).unwrap();
2878        match event {
2879            ParseEvent::Entry { entry, .. } => {
2880                // Should fall back to header size (0 from make_header)
2881                assert_eq!(entry.size, 0);
2882            }
2883            other => panic!("Expected Entry, got {other:?}"),
2884        }
2885    }
2886
2887    #[test]
2888    fn test_strict_accepts_valid_pax_values() {
2889        let mut archive = Vec::new();
2890        archive.extend(make_pax_header(&[
2891            ("uid", b"2000"),
2892            ("gid", b"3000"),
2893            ("size", b"42"),
2894            ("mtime", b"1700000000"),
2895        ]));
2896        archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2897        archive.extend(zeroes(1024));
2898
2899        let mut parser = Parser::new(Limits::default());
2900        let event = parser.parse(&archive).unwrap();
2901        match event {
2902            ParseEvent::Entry { entry, .. } => {
2903                assert_eq!(entry.uid, 2000);
2904                assert_eq!(entry.gid, 3000);
2905                assert_eq!(entry.size, 42);
2906                assert_eq!(entry.mtime, 1700000000);
2907            }
2908            other => panic!("Expected Entry, got {other:?}"),
2909        }
2910    }
2911
2912    #[test]
2913    fn test_strict_accepts_fractional_mtime() {
2914        let archive = make_archive_with_pax("mtime", b"1234567890.123456");
2915        let mut parser = Parser::new(Limits::default());
2916        let event = parser.parse(&archive).unwrap();
2917        match event {
2918            ParseEvent::Entry { entry, .. } => {
2919                assert_eq!(entry.mtime, 1234567890);
2920            }
2921            other => panic!("Expected Entry, got {other:?}"),
2922        }
2923    }
2924
2925    // =========================================================================
2926    // Sparse entry helpers
2927    // =========================================================================
2928
2929    /// Encode a u64 as an 12-byte octal field (for sparse descriptor fields).
2930    fn encode_octal_12(value: u64) -> [u8; 12] {
2931        let s = format!("{value:011o}\0");
2932        let mut field = [0u8; 12];
2933        field.copy_from_slice(s.as_bytes());
2934        field
2935    }
2936
2937    /// Create a GNU sparse header (type 'S') with inline sparse descriptors.
2938    ///
2939    /// `entries` are (offset, length) pairs for the sparse map (max 4).
2940    /// `on_disk_size` is the header's size field (total data bytes on disk).
2941    /// `real_size` is the logical file size.
2942    /// If `is_extended` is true, the isextended flag is set.
2943    fn make_gnu_sparse_header(
2944        name: &[u8],
2945        entries: &[(u64, u64)],
2946        on_disk_size: u64,
2947        real_size: u64,
2948        is_extended: bool,
2949    ) -> [u8; HEADER_SIZE] {
2950        assert!(entries.len() <= 4, "max 4 inline sparse descriptors");
2951
2952        let mut header = [0u8; HEADER_SIZE];
2953
2954        // name (0..100)
2955        let name_len = name.len().min(100);
2956        header[0..name_len].copy_from_slice(&name[..name_len]);
2957
2958        // mode (100..108)
2959        header[100..107].copy_from_slice(b"0000644");
2960        // uid (108..116)
2961        header[108..115].copy_from_slice(b"0001750");
2962        // gid (116..124)
2963        header[116..123].copy_from_slice(b"0001750");
2964
2965        // size (124..136): on-disk data size
2966        let size_str = format!("{on_disk_size:011o}");
2967        header[124..135].copy_from_slice(size_str.as_bytes());
2968
2969        // mtime (136..148)
2970        header[136..147].copy_from_slice(b"14712345670");
2971
2972        // typeflag (156): 'S' for sparse
2973        header[156] = b'S';
2974
2975        // magic (257..263): GNU
2976        header[257..263].copy_from_slice(GNU_MAGIC);
2977        // version (263..265): GNU
2978        header[263..265].copy_from_slice(GNU_VERSION);
2979
2980        // sparse descriptors at offset 386, each 24 bytes
2981        for (i, &(offset, length)) in entries.iter().enumerate() {
2982            let base = 386 + i * 24;
2983            header[base..base + 12].copy_from_slice(&encode_octal_12(offset));
2984            header[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
2985        }
2986
2987        // isextended at offset 482
2988        header[482] = if is_extended { 1 } else { 0 };
2989
2990        // realsize at offset 483
2991        let real_str = format!("{real_size:011o}");
2992        header[483..494].copy_from_slice(real_str.as_bytes());
2993
2994        // Compute and set checksum
2995        let hdr = Header::from_bytes(&header);
2996        let checksum = hdr.compute_checksum();
2997        let checksum_str = format!("{checksum:06o}\0 ");
2998        header[148..156].copy_from_slice(checksum_str.as_bytes());
2999
3000        header
3001    }
3002
3003    /// Create a GNU extended sparse block (512 bytes) with up to 21
3004    /// descriptors. Returns a 512-byte block.
3005    fn make_gnu_ext_sparse(entries: &[(u64, u64)], is_extended: bool) -> [u8; HEADER_SIZE] {
3006        assert!(entries.len() <= 21, "max 21 descriptors per ext block");
3007
3008        let mut block = [0u8; HEADER_SIZE];
3009
3010        for (i, &(offset, length)) in entries.iter().enumerate() {
3011            let base = i * 24;
3012            block[base..base + 12].copy_from_slice(&encode_octal_12(offset));
3013            block[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
3014        }
3015
3016        // isextended at offset 504 (byte after 21 * 24 = 504)
3017        block[504] = if is_extended { 1 } else { 0 };
3018
3019        block
3020    }
3021
3022    // =========================================================================
3023    // Sparse entry tests
3024    // =========================================================================
3025
3026    #[test]
3027    fn test_sparse_basic() {
3028        // Sparse file with 2 data regions: [0x1000..0x1005) and [0x3000..0x3005)
3029        // On-disk size: 10 bytes (5 + 5), real size: 0x3005
3030        let header = make_gnu_sparse_header(
3031            b"sparse.txt",
3032            &[(0x1000, 5), (0x3000, 5)],
3033            10,     // on-disk
3034            0x3005, // real size
3035            false,
3036        );
3037
3038        let mut archive = Vec::new();
3039        archive.extend_from_slice(&header);
3040        // Content (10 bytes, padded to 512)
3041        let mut content = [0u8; HEADER_SIZE];
3042        content[0..5].copy_from_slice(b"hello");
3043        content[5..10].copy_from_slice(b"world");
3044        archive.extend_from_slice(&content);
3045        archive.extend(zeroes(1024)); // end of archive
3046
3047        let mut parser = Parser::new(Limits::default());
3048        let event = parser.parse(&archive).unwrap();
3049
3050        match event {
3051            ParseEvent::SparseEntry {
3052                consumed,
3053                entry,
3054                sparse_map,
3055                real_size,
3056            } => {
3057                assert_eq!(consumed, HEADER_SIZE);
3058                assert_eq!(entry.path_lossy(), "sparse.txt");
3059                assert_eq!(entry.size, 10);
3060                assert_eq!(real_size, 0x3005);
3061                assert_eq!(sparse_map.len(), 2);
3062                assert_eq!(
3063                    sparse_map[0],
3064                    SparseEntry {
3065                        offset: 0x1000,
3066                        length: 5
3067                    }
3068                );
3069                assert_eq!(
3070                    sparse_map[1],
3071                    SparseEntry {
3072                        offset: 0x3000,
3073                        length: 5
3074                    }
3075                );
3076            }
3077            other => panic!("Expected SparseEntry, got {other:?}"),
3078        }
3079    }
3080
3081    #[test]
3082    fn test_sparse_no_entries() {
3083        // Sparse file with no data regions (all zeros), real size 4096
3084        let header = make_gnu_sparse_header(b"empty_sparse.txt", &[], 0, 4096, false);
3085
3086        let mut archive = Vec::new();
3087        archive.extend_from_slice(&header);
3088        archive.extend(zeroes(1024));
3089
3090        let mut parser = Parser::new(Limits::default());
3091        let event = parser.parse(&archive).unwrap();
3092
3093        match event {
3094            ParseEvent::SparseEntry {
3095                sparse_map,
3096                real_size,
3097                entry,
3098                ..
3099            } => {
3100                assert!(sparse_map.is_empty());
3101                assert_eq!(real_size, 4096);
3102                assert_eq!(entry.size, 0);
3103            }
3104            other => panic!("Expected SparseEntry, got {other:?}"),
3105        }
3106    }
3107
3108    #[test]
3109    fn test_sparse_four_inline_entries() {
3110        // Max inline: 4 sparse descriptors
3111        let entries = [(0u64, 512), (1024, 512), (2048, 512), (3072, 512)];
3112        let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3113        let real_size = 3072 + 512;
3114        let header = make_gnu_sparse_header(b"four.txt", &entries, on_disk, real_size, false);
3115
3116        let mut archive = Vec::new();
3117        archive.extend_from_slice(&header);
3118        archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3119        archive.extend(zeroes(1024));
3120
3121        let mut parser = Parser::new(Limits::default());
3122        let event = parser.parse(&archive).unwrap();
3123
3124        match event {
3125            ParseEvent::SparseEntry {
3126                sparse_map,
3127                real_size: rs,
3128                ..
3129            } => {
3130                assert_eq!(sparse_map.len(), 4);
3131                assert_eq!(rs, real_size);
3132                for (i, &(off, len)) in entries.iter().enumerate() {
3133                    assert_eq!(sparse_map[i].offset, off);
3134                    assert_eq!(sparse_map[i].length, len);
3135                }
3136            }
3137            other => panic!("Expected SparseEntry, got {other:?}"),
3138        }
3139    }
3140
3141    #[test]
3142    fn test_sparse_with_extension_block() {
3143        // 4 inline + 2 in extension block = 6 total
3144        let inline_entries = [(0u64, 100), (512, 100), (1024, 100), (1536, 100)];
3145        let ext_entries = [(2048u64, 100), (2560, 100)];
3146        let on_disk: u64 = 600; // 6 * 100
3147        let real_size = 2660; // 2560 + 100
3148
3149        let header =
3150            make_gnu_sparse_header(b"extended.txt", &inline_entries, on_disk, real_size, true);
3151        let ext = make_gnu_ext_sparse(&ext_entries, false);
3152
3153        let mut archive = Vec::new();
3154        archive.extend_from_slice(&header);
3155        archive.extend_from_slice(&ext);
3156        archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3157        archive.extend(zeroes(1024));
3158
3159        let mut parser = Parser::new(Limits::default());
3160        let event = parser.parse(&archive).unwrap();
3161
3162        match event {
3163            ParseEvent::SparseEntry {
3164                consumed,
3165                sparse_map,
3166                real_size: rs,
3167                ..
3168            } => {
3169                // consumed = main header + 1 extension block
3170                assert_eq!(consumed, 2 * HEADER_SIZE);
3171                assert_eq!(rs, real_size);
3172                assert_eq!(sparse_map.len(), 6);
3173                assert_eq!(sparse_map[4].offset, 2048);
3174                assert_eq!(sparse_map[5].offset, 2560);
3175            }
3176            other => panic!("Expected SparseEntry, got {other:?}"),
3177        }
3178    }
3179
3180    #[test]
3181    fn test_sparse_multiple_extension_blocks() {
3182        // 4 inline + 21 in ext1 + 3 in ext2 = 28 total
3183        let inline = [(0u64, 10), (100, 10), (200, 10), (300, 10)];
3184        let mut ext1_entries = Vec::new();
3185        for i in 0..21 {
3186            ext1_entries.push((400 + i * 100, 10u64));
3187        }
3188        let ext2_entries = [(2500u64, 10), (2600, 10), (2700, 10)];
3189        let on_disk = 28 * 10u64;
3190        let real_size = 2710;
3191
3192        let header = make_gnu_sparse_header(b"multi_ext.txt", &inline, on_disk, real_size, true);
3193        let ext1 = make_gnu_ext_sparse(&ext1_entries, true);
3194        let ext2 = make_gnu_ext_sparse(&ext2_entries, false);
3195
3196        let mut archive = Vec::new();
3197        archive.extend_from_slice(&header);
3198        archive.extend_from_slice(&ext1);
3199        archive.extend_from_slice(&ext2);
3200        archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3201        archive.extend(zeroes(1024));
3202
3203        let mut parser = Parser::new(Limits::default());
3204        let event = parser.parse(&archive).unwrap();
3205
3206        match event {
3207            ParseEvent::SparseEntry {
3208                consumed,
3209                sparse_map,
3210                real_size: rs,
3211                ..
3212            } => {
3213                assert_eq!(consumed, 3 * HEADER_SIZE);
3214                assert_eq!(rs, real_size);
3215                assert_eq!(sparse_map.len(), 28);
3216            }
3217            other => panic!("Expected SparseEntry, got {other:?}"),
3218        }
3219    }
3220
3221    #[test]
3222    fn test_sparse_need_data_for_extension() {
3223        // Header says isextended=true, but we only provide the header.
3224        // Parser should return NeedData.
3225        let header = make_gnu_sparse_header(
3226            b"need_ext.txt",
3227            &[(0, 100)],
3228            100,
3229            100,
3230            true, // extension expected
3231        );
3232
3233        let mut parser = Parser::new(Limits::default());
3234        let event = parser.parse(&header).unwrap();
3235
3236        match event {
3237            ParseEvent::NeedData { min_bytes } => {
3238                assert_eq!(min_bytes, 2 * HEADER_SIZE);
3239            }
3240            other => panic!("Expected NeedData, got {other:?}"),
3241        }
3242    }
3243
3244    #[test]
3245    fn test_sparse_need_data_chained_extensions() {
3246        // Header + ext1 (isextended=true), but ext2 not provided.
3247        let header = make_gnu_sparse_header(b"chain.txt", &[(0, 10)], 20, 20, true);
3248        let ext1 = make_gnu_ext_sparse(&[(10, 10)], true); // needs another block
3249
3250        let mut input = Vec::new();
3251        input.extend_from_slice(&header);
3252        input.extend_from_slice(&ext1);
3253
3254        let mut parser = Parser::new(Limits::default());
3255        let event = parser.parse(&input).unwrap();
3256
3257        match event {
3258            ParseEvent::NeedData { min_bytes } => {
3259                assert_eq!(min_bytes, 3 * HEADER_SIZE);
3260            }
3261            other => panic!("Expected NeedData, got {other:?}"),
3262        }
3263    }
3264
3265    #[test]
3266    fn test_sparse_not_gnu_header() {
3267        // UStar header with type 'S' — should error since sparse requires GNU
3268        let header = make_header(b"bad_sparse.txt", 0, b'S');
3269        let mut archive = Vec::new();
3270        archive.extend_from_slice(&header);
3271        archive.extend(zeroes(1024));
3272
3273        let mut parser = Parser::new(Limits::default());
3274        let err = parser.parse(&archive).unwrap_err();
3275        assert!(matches!(err, ParseError::SparseNotGnu));
3276    }
3277
3278    #[test]
3279    fn test_sparse_too_many_entries() {
3280        // Set a low limit and exceed it via extension blocks.
3281        let header = make_gnu_sparse_header(
3282            b"too_many.txt",
3283            &[(0, 10), (100, 10), (200, 10)],
3284            40,
3285            400,
3286            true,
3287        );
3288        // Extension block with 3 more entries → total 6
3289        let ext = make_gnu_ext_sparse(&[(300, 10)], false);
3290
3291        let mut archive = Vec::new();
3292        archive.extend_from_slice(&header);
3293        archive.extend_from_slice(&ext);
3294        archive.extend(zeroes(512));
3295        archive.extend(zeroes(1024));
3296
3297        let limits = Limits {
3298            max_sparse_entries: 3,
3299            ..Default::default()
3300        };
3301        let mut parser = Parser::new(limits);
3302        let err = parser.parse(&archive).unwrap_err();
3303        assert!(matches!(
3304            err,
3305            ParseError::TooManySparseEntries { count: 4, limit: 3 }
3306        ));
3307    }
3308
3309    #[test]
3310    fn test_sparse_with_gnu_long_name() {
3311        // GNU long name followed by a sparse entry — both extensions
3312        // should compose correctly.
3313        let long_name = "a/".to_string() + &"x".repeat(200);
3314
3315        let on_disk = 512u64;
3316        let real_size = 8192u64;
3317        let header = make_gnu_sparse_header(b"placeholder", &[(0, 512)], on_disk, real_size, false);
3318
3319        let mut archive = Vec::new();
3320        archive.extend(make_gnu_long_name(long_name.as_bytes()));
3321        archive.extend_from_slice(&header);
3322        archive.extend(zeroes(on_disk as usize)); // content
3323        archive.extend(zeroes(1024)); // end
3324
3325        let mut parser = Parser::new(Limits::default());
3326        let event = parser.parse(&archive).unwrap();
3327
3328        match event {
3329            ParseEvent::SparseEntry {
3330                entry,
3331                sparse_map,
3332                real_size: rs,
3333                ..
3334            } => {
3335                assert_eq!(entry.path.as_ref(), long_name.as_bytes());
3336                assert_eq!(rs, real_size);
3337                assert_eq!(sparse_map.len(), 1);
3338                assert_eq!(sparse_map[0].length, 512);
3339            }
3340            other => panic!("Expected SparseEntry, got {other:?}"),
3341        }
3342    }
3343
3344    #[test]
3345    fn test_sparse_need_data_is_side_effect_free() {
3346        // Provide only the header (isextended=true) → NeedData.
3347        // Then provide the full archive → SparseEntry.
3348        // The parser should not have modified state from the first call.
3349        let header = make_gnu_sparse_header(b"retry.txt", &[(0, 100)], 200, 300, true);
3350        let ext = make_gnu_ext_sparse(&[(100, 100)], false);
3351
3352        let mut parser = Parser::new(Limits::default());
3353
3354        // First attempt: only header
3355        let event = parser.parse(&header).unwrap();
3356        assert!(matches!(event, ParseEvent::NeedData { .. }));
3357
3358        // Second attempt: full archive
3359        let mut full = Vec::new();
3360        full.extend_from_slice(&header);
3361        full.extend_from_slice(&ext);
3362        full.extend(zeroes(512)); // content
3363        full.extend(zeroes(1024)); // end
3364
3365        let event = parser.parse(&full).unwrap();
3366        match event {
3367            ParseEvent::SparseEntry {
3368                consumed,
3369                sparse_map,
3370                ..
3371            } => {
3372                assert_eq!(consumed, 2 * HEADER_SIZE);
3373                assert_eq!(sparse_map.len(), 2);
3374            }
3375            other => panic!("Expected SparseEntry, got {other:?}"),
3376        }
3377    }
3378
3379    // =========================================================================
3380    // PAX sparse tests
3381    // =========================================================================
3382
3383    #[test]
3384    fn test_pax_sparse_v01_map() {
3385        // PAX v0.1: GNU.sparse.map as comma-separated offset,length pairs
3386        let mut archive = Vec::new();
3387        archive.extend(make_pax_header(&[
3388            ("GNU.sparse.map", b"0,100,200,100,400,50"),
3389            ("GNU.sparse.realsize", b"450"),
3390            ("GNU.sparse.name", b"real_name.txt"),
3391        ]));
3392        // The actual file header — 250 bytes of on-disk data
3393        archive.extend_from_slice(&make_header(b"placeholder.txt", 250, b'0'));
3394        archive.extend(zeroes(512)); // content (250 bytes padded)
3395        archive.extend(zeroes(1024)); // end of archive
3396
3397        let mut parser = Parser::new(Limits::default());
3398        let event = parser.parse(&archive).unwrap();
3399
3400        match event {
3401            ParseEvent::SparseEntry {
3402                entry,
3403                sparse_map,
3404                real_size,
3405                ..
3406            } => {
3407                assert_eq!(entry.path.as_ref(), b"real_name.txt");
3408                assert_eq!(real_size, 450);
3409                assert_eq!(sparse_map.len(), 3);
3410                assert_eq!(
3411                    sparse_map[0],
3412                    SparseEntry {
3413                        offset: 0,
3414                        length: 100
3415                    }
3416                );
3417                assert_eq!(
3418                    sparse_map[1],
3419                    SparseEntry {
3420                        offset: 200,
3421                        length: 100
3422                    }
3423                );
3424                assert_eq!(
3425                    sparse_map[2],
3426                    SparseEntry {
3427                        offset: 400,
3428                        length: 50
3429                    }
3430                );
3431            }
3432            other => panic!("Expected SparseEntry, got {other:?}"),
3433        }
3434    }
3435
3436    #[test]
3437    fn test_pax_sparse_v00_pairs() {
3438        // PAX v0.0: repeated GNU.sparse.offset / GNU.sparse.numbytes pairs
3439        let mut archive = Vec::new();
3440        archive.extend(make_pax_header(&[
3441            ("GNU.sparse.offset", b"0"),
3442            ("GNU.sparse.numbytes", b"100"),
3443            ("GNU.sparse.offset", b"1024"),
3444            ("GNU.sparse.numbytes", b"200"),
3445            ("GNU.sparse.realsize", b"1224"),
3446            ("GNU.sparse.name", b"v00_sparse.dat"),
3447        ]));
3448        archive.extend_from_slice(&make_header(b"placeholder", 300, b'0'));
3449        archive.extend(zeroes(512)); // content
3450        archive.extend(zeroes(1024)); // end
3451
3452        let mut parser = Parser::new(Limits::default());
3453        let event = parser.parse(&archive).unwrap();
3454
3455        match event {
3456            ParseEvent::SparseEntry {
3457                entry,
3458                sparse_map,
3459                real_size,
3460                ..
3461            } => {
3462                assert_eq!(entry.path.as_ref(), b"v00_sparse.dat");
3463                assert_eq!(real_size, 1224);
3464                assert_eq!(sparse_map.len(), 2);
3465                assert_eq!(
3466                    sparse_map[0],
3467                    SparseEntry {
3468                        offset: 0,
3469                        length: 100
3470                    }
3471                );
3472                assert_eq!(
3473                    sparse_map[1],
3474                    SparseEntry {
3475                        offset: 1024,
3476                        length: 200
3477                    }
3478                );
3479            }
3480            other => panic!("Expected SparseEntry, got {other:?}"),
3481        }
3482    }
3483
3484    #[test]
3485    fn test_pax_sparse_v10_data_prefix() {
3486        // PAX v1.0: sparse map in data block prefix
3487        let mut archive = Vec::new();
3488        archive.extend(make_pax_header(&[
3489            ("GNU.sparse.major", b"1"),
3490            ("GNU.sparse.minor", b"0"),
3491            ("GNU.sparse.realsize", b"2048"),
3492            ("GNU.sparse.name", b"v10_sparse.bin"),
3493        ]));
3494
3495        // The data block prefix contains the sparse map:
3496        // "2\n0\n100\n1024\n200\n" = 20 bytes, padded to 512
3497        let sparse_data = b"2\n0\n100\n1024\n200\n";
3498        let on_disk_content = 300u64; // actual data bytes after the map
3499        let total_size = 512 + on_disk_content; // map prefix (padded) + content
3500
3501        archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3502        // Data: sparse map prefix (padded to 512) + actual content
3503        let mut data_block = vec![0u8; 512];
3504        data_block[..sparse_data.len()].copy_from_slice(sparse_data);
3505        archive.extend_from_slice(&data_block);
3506        archive.extend(zeroes(on_disk_content.next_multiple_of(512) as usize));
3507        archive.extend(zeroes(1024)); // end
3508
3509        let mut parser = Parser::new(Limits::default());
3510        let event = parser.parse(&archive).unwrap();
3511
3512        match event {
3513            ParseEvent::SparseEntry {
3514                consumed,
3515                entry,
3516                sparse_map,
3517                real_size,
3518            } => {
3519                assert_eq!(entry.path.as_ref(), b"v10_sparse.bin");
3520                assert_eq!(real_size, 2048);
3521                assert_eq!(sparse_map.len(), 2);
3522                assert_eq!(
3523                    sparse_map[0],
3524                    SparseEntry {
3525                        offset: 0,
3526                        length: 100
3527                    }
3528                );
3529                assert_eq!(
3530                    sparse_map[1],
3531                    SparseEntry {
3532                        offset: 1024,
3533                        length: 200
3534                    }
3535                );
3536                // entry.size is the on-disk content after the map prefix
3537                assert_eq!(entry.size, on_disk_content);
3538                // consumed includes: PAX header + its content + actual header
3539                // + sparse map prefix (512 bytes)
3540                let pax_hdr_size = archive.len()
3541                    - HEADER_SIZE // actual file header
3542                    - 512 // sparse map data
3543                    - on_disk_content.next_multiple_of(512) as usize
3544                    - 1024; // end
3545                let expected_consumed = pax_hdr_size + HEADER_SIZE + 512;
3546                assert_eq!(consumed, expected_consumed);
3547            }
3548            other => panic!("Expected SparseEntry, got {other:?}"),
3549        }
3550    }
3551
3552    #[test]
3553    fn test_pax_sparse_v10_need_data() {
3554        // PAX v1.0 with insufficient data for the sparse map prefix.
3555        let mut archive = Vec::new();
3556        archive.extend(make_pax_header(&[
3557            ("GNU.sparse.major", b"1"),
3558            ("GNU.sparse.minor", b"0"),
3559            ("GNU.sparse.realsize", b"100"),
3560            ("GNU.sparse.name", b"v10_need.txt"),
3561        ]));
3562
3563        // Provide the actual file header but NOT the data block.
3564        archive.extend_from_slice(&make_header(b"placeholder", 512, b'0'));
3565
3566        let mut parser = Parser::new(Limits::default());
3567        let event = parser.parse(&archive).unwrap();
3568
3569        assert!(
3570            matches!(event, ParseEvent::NeedData { .. }),
3571            "Expected NeedData, got {event:?}"
3572        );
3573    }
3574
3575    #[test]
3576    fn test_pax_sparse_v01_odd_map_values() {
3577        // GNU.sparse.map with odd number of values is an error
3578        let mut archive = Vec::new();
3579        archive.extend(make_pax_header(&[
3580            ("GNU.sparse.map", b"0,100,200"),
3581            ("GNU.sparse.realsize", b"300"),
3582        ]));
3583        archive.extend_from_slice(&make_header(b"file.txt", 100, b'0'));
3584        archive.extend(zeroes(512));
3585        archive.extend(zeroes(1024));
3586
3587        let mut parser = Parser::new(Limits::default());
3588        let err = parser.parse(&archive).unwrap_err();
3589        assert!(matches!(err, ParseError::InvalidPaxSparseMap(_)));
3590    }
3591
3592    #[test]
3593    fn test_pax_sparse_v10_too_many_entries() {
3594        let mut archive = Vec::new();
3595        archive.extend(make_pax_header(&[
3596            ("GNU.sparse.major", b"1"),
3597            ("GNU.sparse.minor", b"0"),
3598            ("GNU.sparse.realsize", b"100"),
3599            ("GNU.sparse.name", b"toomany.txt"),
3600        ]));
3601
3602        // Sparse map claims 1000 entries
3603        let sparse_data = b"1000\n";
3604        let total_size = 512u64;
3605        archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3606        let mut data_block = vec![0u8; 512];
3607        data_block[..sparse_data.len()].copy_from_slice(sparse_data);
3608        archive.extend_from_slice(&data_block);
3609        archive.extend(zeroes(1024));
3610
3611        let limits = Limits {
3612            max_sparse_entries: 100,
3613            ..Default::default()
3614        };
3615        let mut parser = Parser::new(limits);
3616        let err = parser.parse(&archive).unwrap_err();
3617        assert!(
3618            matches!(
3619                err,
3620                ParseError::TooManySparseEntries {
3621                    count: 1000,
3622                    limit: 100
3623                }
3624            ),
3625            "got: {err:?}"
3626        );
3627    }
3628
3629    #[test]
3630    fn test_pax_sparse_without_version_is_v00() {
3631        // PAX sparse data without version fields should be treated as v0.0
3632        // (offset/numbytes pairs), not routed to v1.0 handler.
3633        let mut archive = Vec::new();
3634        archive.extend(make_pax_header(&[
3635            ("GNU.sparse.offset", b"0"),
3636            ("GNU.sparse.numbytes", b"50"),
3637            ("GNU.sparse.realsize", b"50"),
3638        ]));
3639        archive.extend_from_slice(&make_header(b"noversion.txt", 50, b'0'));
3640        archive.extend(zeroes(512)); // content
3641        archive.extend(zeroes(1024)); // end
3642
3643        let mut parser = Parser::new(Limits::default());
3644        let event = parser.parse(&archive).unwrap();
3645
3646        match event {
3647            ParseEvent::SparseEntry {
3648                sparse_map,
3649                real_size,
3650                ..
3651            } => {
3652                assert_eq!(sparse_map.len(), 1);
3653                assert_eq!(
3654                    sparse_map[0],
3655                    SparseEntry {
3656                        offset: 0,
3657                        length: 50
3658                    }
3659                );
3660                assert_eq!(real_size, 50);
3661            }
3662            other => panic!("Expected SparseEntry, got {other:?}"),
3663        }
3664    }
3665
3666    // =========================================================================
3667    // Sparse proptests
3668    // =========================================================================
3669
3670    mod sparse_proptests {
3671        use super::*;
3672        use proptest::prelude::*;
3673
3674        /// Strategy for a sparse map: a sorted list of non-overlapping
3675        /// (offset, length) pairs with reasonable values.
3676        fn sparse_map_strategy(max_entries: usize) -> impl Strategy<Value = Vec<(u64, u64)>> {
3677            proptest::collection::vec((0u64..0x10_000, 1u64..0x1000), 0..=max_entries).prop_map(
3678                |raw| {
3679                    // Sort by offset, then deduplicate/separate so entries
3680                    // don't overlap.
3681                    let mut entries: Vec<(u64, u64)> = Vec::new();
3682                    let mut cursor = 0u64;
3683                    for (gap, length) in raw {
3684                        let offset = cursor.saturating_add(gap);
3685                        entries.push((offset, length));
3686                        cursor = offset.saturating_add(length);
3687                    }
3688                    entries
3689                },
3690            )
3691        }
3692
3693        proptest! {
3694            #[test]
3695            fn test_sparse_roundtrip_inline(
3696                entries in sparse_map_strategy(4),
3697                name_len in 1usize..50,
3698            ) {
3699                let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3700                let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3701                let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3702
3703                let header = make_gnu_sparse_header(
3704                    &name,
3705                    &entries,
3706                    on_disk,
3707                    real_size,
3708                    false,
3709                );
3710
3711                let mut archive = Vec::new();
3712                archive.extend_from_slice(&header);
3713                archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3714                archive.extend(zeroes(1024));
3715
3716                let mut parser = Parser::new(Limits::default());
3717                let event = parser.parse(&archive).unwrap();
3718
3719                match event {
3720                    ParseEvent::SparseEntry {
3721                        consumed,
3722                        sparse_map,
3723                        real_size: rs,
3724                        entry,
3725                        ..
3726                    } => {
3727                        prop_assert_eq!(consumed, HEADER_SIZE);
3728                        prop_assert_eq!(&entry.path[..], &name[..]);
3729                        prop_assert_eq!(rs, real_size);
3730                        prop_assert_eq!(sparse_map.len(), entries.len());
3731                        for (i, &(off, len)) in entries.iter().enumerate() {
3732                            prop_assert_eq!(sparse_map[i].offset, off);
3733                            prop_assert_eq!(sparse_map[i].length, len);
3734                        }
3735                    }
3736                    other => {
3737                        return Err(proptest::test_runner::TestCaseError::fail(
3738                            format!("Expected SparseEntry, got {other:?}")));
3739                    }
3740                }
3741            }
3742
3743            #[test]
3744            fn test_sparse_roundtrip_extended(
3745                // 5..=25 entries forces at least one extension block
3746                entries in sparse_map_strategy(25).prop_filter(
3747                    "need >4 entries for extension",
3748                    |e| e.len() > 4
3749                ),
3750            ) {
3751                let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3752                let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3753
3754                // Split into inline (first 4) and extension blocks (21 per block)
3755                let (inline, rest) = entries.split_at(4);
3756                let header = make_gnu_sparse_header(
3757                    b"proptest_ext.bin",
3758                    inline,
3759                    on_disk,
3760                    real_size,
3761                    !rest.is_empty(),
3762                );
3763
3764                let mut archive = Vec::new();
3765                archive.extend_from_slice(&header);
3766
3767                // Emit extension blocks, 21 entries per block
3768                let chunks: Vec<&[(u64, u64)]> = rest.chunks(21).collect();
3769                for (i, chunk) in chunks.iter().enumerate() {
3770                    let is_last = i == chunks.len() - 1;
3771                    let ext = make_gnu_ext_sparse(chunk, !is_last);
3772                    archive.extend_from_slice(&ext);
3773                }
3774
3775                archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3776                archive.extend(zeroes(1024));
3777
3778                let mut parser = Parser::new(Limits::default());
3779                let event = parser.parse(&archive).unwrap();
3780
3781                match event {
3782                    ParseEvent::SparseEntry {
3783                        consumed,
3784                        sparse_map,
3785                        real_size: rs,
3786                        ..
3787                    } => {
3788                        let expected_blocks = 1 + chunks.len();
3789                        prop_assert_eq!(consumed, expected_blocks * HEADER_SIZE);
3790                        prop_assert_eq!(rs, real_size);
3791                        prop_assert_eq!(sparse_map.len(), entries.len());
3792                        for (i, &(off, len)) in entries.iter().enumerate() {
3793                            prop_assert_eq!(sparse_map[i].offset, off);
3794                            prop_assert_eq!(sparse_map[i].length, len);
3795                        }
3796                    }
3797                    other => {
3798                        return Err(proptest::test_runner::TestCaseError::fail(
3799                            format!("Expected SparseEntry, got {other:?}")));
3800                    }
3801                }
3802            }
3803
3804            #[test]
3805            fn test_sparse_need_data_then_retry(
3806                n_ext_entries in 1usize..10,
3807            ) {
3808                // Build a sparse file with extension blocks, feed partial
3809                // data first (just the header), verify NeedData, then feed
3810                // the full archive and verify success.
3811                let inline = [(0u64, 100), (200, 100), (400, 100), (600, 100)];
3812                let ext_entries: Vec<(u64, u64)> = (0..n_ext_entries)
3813                    .map(|i| (800 + i as u64 * 200, 100))
3814                    .collect();
3815                let total = 4 + n_ext_entries;
3816                let on_disk = total as u64 * 100;
3817                let real_size = ext_entries.last().map(|(o, l)| o + l).unwrap_or(800);
3818
3819                let header = make_gnu_sparse_header(
3820                    b"retry_ext.txt",
3821                    &inline,
3822                    on_disk,
3823                    real_size,
3824                    true,
3825                );
3826                let ext = make_gnu_ext_sparse(&ext_entries, false);
3827
3828                let mut parser = Parser::new(Limits::default());
3829
3830                // Partial: just the header
3831                let event = parser.parse(&header).unwrap();
3832                assert!(matches!(event, ParseEvent::NeedData { .. }));
3833
3834                // Full archive
3835                let mut full = Vec::new();
3836                full.extend_from_slice(&header);
3837                full.extend_from_slice(&ext);
3838                full.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3839                full.extend(zeroes(1024));
3840
3841                let event = parser.parse(&full).unwrap();
3842                match event {
3843                    ParseEvent::SparseEntry { sparse_map, .. } => {
3844                        prop_assert_eq!(sparse_map.len(), total);
3845                    }
3846                    other => {
3847                        return Err(proptest::test_runner::TestCaseError::fail(
3848                            format!("Expected SparseEntry, got {other:?}")));
3849                    }
3850                }
3851            }
3852
3853            // =================================================================
3854            // PAX sparse format proptests
3855            // =================================================================
3856
3857            #[test]
3858            fn test_pax_sparse_v00_roundtrip(
3859                entries in sparse_map_strategy(15),
3860                name_len in 1usize..50,
3861            ) {
3862                let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3863                let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3864                let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3865
3866                let mut pax_kv: Vec<(&str, Vec<u8>)> = Vec::new();
3867                for &(offset, length) in &entries {
3868                    pax_kv.push(("GNU.sparse.offset", offset.to_string().into_bytes()));
3869                    pax_kv.push(("GNU.sparse.numbytes", length.to_string().into_bytes()));
3870                }
3871                pax_kv.push(("GNU.sparse.realsize", real_size.to_string().into_bytes()));
3872                pax_kv.push(("GNU.sparse.name", name.clone()));
3873
3874                let pax_refs: Vec<(&str, &[u8])> =
3875                    pax_kv.iter().map(|(k, v)| (*k, v.as_slice())).collect();
3876
3877                let mut archive = Vec::new();
3878                archive.extend(make_pax_header(&pax_refs));
3879                archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
3880                archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3881                archive.extend(zeroes(1024));
3882
3883                let mut parser = Parser::new(Limits::default());
3884                let event = parser.parse(&archive).unwrap();
3885
3886                match event {
3887                    ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3888                        prop_assert_eq!(&entry.path[..], &name[..]);
3889                        prop_assert_eq!(rs, real_size);
3890                        prop_assert_eq!(sparse_map.len(), entries.len());
3891                        for (i, &(off, len)) in entries.iter().enumerate() {
3892                            prop_assert_eq!(sparse_map[i].offset, off);
3893                            prop_assert_eq!(sparse_map[i].length, len);
3894                        }
3895                    }
3896                    ParseEvent::Entry { .. } if entries.is_empty() => {}
3897                    other => {
3898                        return Err(proptest::test_runner::TestCaseError::fail(
3899                            format!("Expected SparseEntry, got {other:?}")));
3900                    }
3901                }
3902            }
3903
3904            #[test]
3905            fn test_pax_sparse_v01_roundtrip(
3906                entries in sparse_map_strategy(15),
3907                name_len in 1usize..50,
3908            ) {
3909                let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3910                let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3911                let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3912
3913                let map_str: String = entries
3914                    .iter()
3915                    .flat_map(|(o, l)| [o.to_string(), l.to_string()])
3916                    .collect::<Vec<_>>()
3917                    .join(",");
3918                let map_bytes = map_str.into_bytes();
3919                let rs_bytes = real_size.to_string().into_bytes();
3920
3921                let pax_refs: Vec<(&str, &[u8])> = vec![
3922                    ("GNU.sparse.map", &map_bytes),
3923                    ("GNU.sparse.realsize", &rs_bytes),
3924                    ("GNU.sparse.name", &name),
3925                ];
3926
3927                let mut archive = Vec::new();
3928                archive.extend(make_pax_header(&pax_refs));
3929                archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
3930                archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3931                archive.extend(zeroes(1024));
3932
3933                let mut parser = Parser::new(Limits::default());
3934                let event = parser.parse(&archive).unwrap();
3935
3936                match event {
3937                    ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3938                        prop_assert_eq!(&entry.path[..], &name[..]);
3939                        prop_assert_eq!(rs, real_size);
3940                        prop_assert_eq!(sparse_map.len(), entries.len());
3941                        for (i, &(off, len)) in entries.iter().enumerate() {
3942                            prop_assert_eq!(sparse_map[i].offset, off);
3943                            prop_assert_eq!(sparse_map[i].length, len);
3944                        }
3945                    }
3946                    ParseEvent::Entry { .. } if entries.is_empty() => {}
3947                    other => {
3948                        return Err(proptest::test_runner::TestCaseError::fail(
3949                            format!("Expected SparseEntry, got {other:?}")));
3950                    }
3951                }
3952            }
3953
3954            #[test]
3955            fn test_pax_sparse_v10_roundtrip(
3956                entries in sparse_map_strategy(20),
3957                name_len in 1usize..50,
3958            ) {
3959                let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3960                let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3961                let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3962
3963                let mut map_data = format!("{}\n", entries.len());
3964                for &(offset, length) in &entries {
3965                    map_data.push_str(&format!("{offset}\n{length}\n"));
3966                }
3967                let map_bytes = map_data.into_bytes();
3968                let map_padded = map_bytes.len().next_multiple_of(HEADER_SIZE);
3969                let total_size = map_padded as u64 + on_disk;
3970                let rs_bytes = real_size.to_string().into_bytes();
3971
3972                let pax_refs: Vec<(&str, &[u8])> = vec![
3973                    ("GNU.sparse.major", b"1"),
3974                    ("GNU.sparse.minor", b"0"),
3975                    ("GNU.sparse.realsize", &rs_bytes),
3976                    ("GNU.sparse.name", &name),
3977                ];
3978
3979                let mut archive = Vec::new();
3980                archive.extend(make_pax_header(&pax_refs));
3981                archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3982                let mut data_block = vec![0u8; map_padded];
3983                data_block[..map_bytes.len()].copy_from_slice(&map_bytes);
3984                archive.extend_from_slice(&data_block);
3985                archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3986                archive.extend(zeroes(1024));
3987
3988                let mut parser = Parser::new(Limits::default());
3989                let event = parser.parse(&archive).unwrap();
3990
3991                match event {
3992                    ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3993                        prop_assert_eq!(&entry.path[..], &name[..]);
3994                        prop_assert_eq!(rs, real_size);
3995                        prop_assert_eq!(entry.size, on_disk);
3996                        prop_assert_eq!(sparse_map.len(), entries.len());
3997                        for (i, &(off, len)) in entries.iter().enumerate() {
3998                            prop_assert_eq!(sparse_map[i].offset, off);
3999                            prop_assert_eq!(sparse_map[i].length, len);
4000                        }
4001                    }
4002                    other => {
4003                        return Err(proptest::test_runner::TestCaseError::fail(
4004                            format!("Expected SparseEntry, got {other:?}")));
4005                    }
4006                }
4007            }
4008        }
4009    }
4010
4011    /// Regression test: `add_consumed` must not overflow when chained
4012    /// extension headers declare very large sizes.
4013    ///
4014    /// With `Limits::permissive()` (`max_metadata_size = u32::MAX`),
4015    /// extension headers can declare sizes close to `u32::MAX`.  When
4016    /// `handle_extension` recurses and the inner call returns `NeedData`,
4017    /// `add_consumed(total_size)` is applied on unwind at each level.
4018    /// Before the fix, `min_bytes + n` used plain `+` and could overflow
4019    /// `usize` (especially on 32-bit targets).  The fix uses
4020    /// `saturating_add`.  This test verifies the parser returns `NeedData`
4021    /// (or an error) without panicking.
4022    #[test]
4023    fn test_add_consumed_no_overflow() {
4024        // First extension: a complete GNU long name ('L') with a small
4025        // payload so the parser can fully consume it and recurse.
4026        let long_name = b"a]long/path".to_vec();
4027        let gnu_entry = make_gnu_long_name(&long_name);
4028        let first_entry_size = gnu_entry.len(); // 1024 bytes (header + padded name)
4029
4030        // Second extension: a PAX ('x') header that declares a size close
4031        // to u32::MAX.  We only provide the header—not the content—so the
4032        // recursive call in handle_extension will return NeedData with
4033        // min_bytes ≈ pax_size + 512.  On unwind, add_consumed adds
4034        // first_entry_size, giving min_bytes ≈ pax_size + 512 + 1024.
4035        // On 32-bit this would overflow without saturating_add.
4036        let pax_size: u64 = u32::MAX as u64 - long_name.len() as u64 - 512;
4037        let pax_header = make_header(b"PaxHeaders/file", pax_size, b'x');
4038
4039        // Build input: complete GNU long name entry + PAX header only (no
4040        // PAX content).
4041        let mut input = Vec::with_capacity(first_entry_size + HEADER_SIZE);
4042        input.extend_from_slice(&gnu_entry);
4043        input.extend_from_slice(&pax_header);
4044
4045        let mut parser = Parser::new(Limits::permissive());
4046        let result = parser.parse(&input);
4047
4048        // The parser must not panic.  It should return NeedData (because the
4049        // PAX content is missing) or an error—both are acceptable.
4050        match result {
4051            Ok(ParseEvent::NeedData { min_bytes }) => {
4052                // min_bytes must be at least the PAX entry's total_size
4053                // (header + padded content), and must not have wrapped to
4054                // a small value due to overflow.
4055                assert!(
4056                    min_bytes > HEADER_SIZE,
4057                    "min_bytes should be large, got {min_bytes}"
4058                );
4059            }
4060            Err(_) => {
4061                // An error (e.g. metadata too large) is also acceptable;
4062                // the important thing is no panic from arithmetic overflow.
4063            }
4064            other => panic!(
4065                "Expected NeedData or Err for truncated extension chain, got {:?}",
4066                other
4067            ),
4068        }
4069    }
4070}