Skip to main content

tar_framing/
stream.rs

1//! Lossless, block-oriented tar streaming.
2//!
3//! This API emits one frame for each accepted non-terminator physical
4//! tar block and preserves each source block verbatim.
5//!
6//! The following Mermaid diagram described the state machine:
7//!
8//! ```mermaid
9//! ---
10//! config:
11//!   layout: elk
12//! ---
13//!
14//! stateDiagram-v2
15//!   state "AwaitingHeader (unclassified)" as Unclassified
16//!   state SelectFormat <<choice>>
17//!
18//!   [*] --> Unclassified
19//!   Unclassified --> AwaitingSecondZero: first zero block
20//!   Unclassified --> SelectFormat: first nonzero header
21//!   SelectFormat --> PosixHeader: ustar identity
22//!   SelectFormat --> GnuHeader: GNU identity
23//!   SelectFormat --> Failed: unsupported identity
24//!
25//!   state "POSIX-pax selected" as Pax {
26//!     direction TB
27//!     state "AwaitingHeader" as PosixBoundary
28//!     state PosixHeader <<choice>>
29//!     state "ReadingMember" as PosixMemberData
30//!
31//!     PosixBoundary --> PosixHeader: next header
32//!     PosixHeader --> ReadingPax: x or g
33//!     PosixHeader --> PosixMemberData: member data
34//!     PosixHeader --> PosixBoundary: empty member
35//!     ReadingPax --> AwaitingUstarHeader: local x payload complete
36//!     ReadingPax --> PosixBoundary: global g payload complete
37//!     AwaitingUstarHeader --> PosixMemberData: member data
38//!     AwaitingUstarHeader --> PosixBoundary: empty member
39//!     PosixMemberData --> PosixBoundary: payload complete
40//!   }
41//!
42//!   state "GNU selected" as Gnu {
43//!     direction TB
44//!     state "AwaitingHeader" as GnuBoundary
45//!     state GnuHeader <<choice>>
46//!     state "ReadingMember" as GnuMemberData
47//!
48//!     GnuBoundary --> GnuHeader: next header
49//!     GnuHeader --> ReadingGnu: L or K data
50//!     GnuHeader --> AwaitingGnuMember: empty L or K
51//!     GnuHeader --> GnuMemberData: member data
52//!     GnuHeader --> GnuBoundary: empty member
53//!     ReadingGnu --> AwaitingGnuMember: metadata payload complete
54//!     AwaitingGnuMember --> ReadingGnu: another L or K data
55//!     AwaitingGnuMember --> AwaitingGnuMember: another empty L or K
56//!     AwaitingGnuMember --> GnuMemberData: member data
57//!     AwaitingGnuMember --> GnuBoundary: empty member
58//!     GnuMemberData --> GnuBoundary: payload complete
59//!   }
60//!
61//! PosixBoundary --> AwaitingSecondZero: first zero block
62//! GnuBoundary --> AwaitingSecondZero: first zero block
63//! AwaitingSecondZero --> Complete: second zero block
64//! AwaitingSecondZero --> Failed: nonzero block or EOF
65//! Complete --> [*]
66//! Failed --> [*]
67//!
68//! note right of Failed
69//!   Validation, ordering, and family-mismatch
70//!   errors also enter Failed; arrows omitted.
71//! end note
72//! ```
73
74use std::{
75    future::poll_fn,
76    ops::Range,
77    pin::Pin,
78    sync::Arc,
79    task::{Context, Poll},
80};
81
82use tokio::io::{AsyncRead, ReadBuf};
83
84use crate::{
85    ArchiveFormat, BLOCK_SIZE, Block, DEFAULT_MAX_GLOBAL_PAX_EXTENSIONS_SIZE,
86    DEFAULT_MAX_GNU_EXTENSION_SIZE, DEFAULT_MAX_PAX_EXTENSION_SIZE, FrameError, FrameErrorInner,
87    GnuKind, HdrCharset, PaxError, PaxKind, PaxRecord, PaxState, PaxValue, UstarKind,
88    header::{
89        CHECKSUM_RANGE, GID_RANGE, GNAME_RANGE, GNU_IDENTITY, IDENTITY_RANGE, MODE_RANGE,
90        MTIME_RANGE, NAME_RANGE, PREFIX_RANGE, SIZE_RANGE, TYPEFLAG_OFFSET, UID_RANGE, UNAME_RANGE,
91        USTAR_IDENTITY, checksum, is_all_nul, parse_number, parse_octal,
92    },
93    pax::{GlobalPaxRecords, PaxRecords, SharedPaxRecords},
94};
95
96type PositionedBlock = (u64, Block);
97
98/// Represents a single non-terminator physical block in a tar stream.
99#[derive(Clone, Debug, Eq, PartialEq)]
100pub enum Frame {
101    /// A local or global pax extended header block.
102    Pax(PaxFrame),
103    /// A GNU long-name or long-link extension header block.
104    Gnu(GnuFrame),
105    /// An ordinary POSIX-ustar or GNU member header block.
106    Header(HeaderFrame),
107    /// A pax or member payload block.
108    Data(DataFrame),
109}
110
111/// A pax extended header block.
112#[derive(Clone, Debug, Eq, PartialEq)]
113pub struct PaxFrame {
114    /// The absolute byte position of this block in the source stream.
115    pub position: u64,
116    /// The lossless header block bytes.
117    pub block: Block,
118    /// Whether this header is local or global.
119    pub kind: PaxKind,
120    /// The number of bytes occupied by the extended header records.
121    pub payload_size: u64,
122}
123
124/// A GNU metadata extension header block.
125#[derive(Clone, Debug, Eq, PartialEq)]
126pub struct GnuFrame {
127    /// The absolute byte position of this block in the source stream.
128    pub position: u64,
129    /// The lossless header block bytes.
130    pub block: Block,
131    /// The GNU extension kind.
132    pub kind: GnuKind,
133    /// The number of metadata payload bytes following the header.
134    pub payload_size: u64,
135}
136
137/// An ordinary physical member header block in the selected archive family.
138///
139/// PAX records remain on their physical payload frames. Use
140/// [`crate::logical::TarReader`] for assembled member metadata.
141#[derive(Clone, Debug, Eq, PartialEq)]
142pub struct HeaderFrame {
143    /// The absolute byte position of this block in the source stream.
144    pub position: u64,
145    /// The lossless header block bytes.
146    pub block: Block,
147    /// The selected archive family of this member header.
148    pub format: ArchiveFormat,
149    /// The member type identified by the header.
150    pub kind: UstarKind,
151    /// The size encoded directly in the ustar or GNU member header field.
152    pub declared_size: u64,
153    /// The size after applying applicable pax `size` records.
154    ///
155    /// This is also the number of payload bytes for which data frames will be
156    /// emitted. Member kinds that cannot carry payload are rejected when either
157    /// their declared or effective size is nonzero.
158    pub effective_size: u64,
159    pub(crate) mode: Option<u64>,
160    pub(crate) uid: Option<u64>,
161    pub(crate) gid: Option<u64>,
162    pub(crate) mtime: Option<u64>,
163}
164
165impl HeaderFrame {
166    fn ustar(
167        position: u64,
168        block: Block,
169        typeflag: u8,
170        declared_size: u64,
171        local_records: Option<&PaxRecords>,
172        global_records: Option<&GlobalPaxRecords>,
173        allow_all_nul_numeric_fields: bool,
174    ) -> Result<Self, FrameError> {
175        let kind = UstarKind::try_from_framed(position, typeflag)?;
176
177        // Some real-world pax writers encode absent ordinary-header metadata as
178        // all NULs. The compatibility policy may accept these empty fields; every
179        // populated fallback remains subject to strict validation.
180        let parse_numeric_field = |field, range: Range<usize>| {
181            Self::parse_numeric_field(
182                position,
183                ArchiveFormat::Pax,
184                field,
185                &block[range],
186                allow_all_nul_numeric_fields,
187            )
188        };
189        let mode = parse_numeric_field("mode", MODE_RANGE)?;
190        let uid = parse_numeric_field("uid", UID_RANGE)?;
191        let gid = parse_numeric_field("gid", GID_RANGE)?;
192        let mtime = parse_numeric_field("mtime", MTIME_RANGE)?;
193
194        let validate_string_field = |field: &'static str, bytes: &[u8]| {
195            if bytes.contains(&0) {
196                Ok(())
197            } else {
198                Err(FrameError::at(
199                    position,
200                    FrameErrorInner::UnterminatedUstarStringField { field },
201                ))
202            }
203        };
204        validate_string_field("uname", &block[UNAME_RANGE])?;
205        validate_string_field("gname", &block[GNAME_RANGE])?;
206
207        // POSIX pax deliberately leaves the representation of device numbers unspecified.
208        // We do not consume those fields, so devmajor and devminor remain opaque.
209
210        // Determine our member's actual (i.e. effective) size. This takes any pax `size`
211        // records into account, in addition to the normal header-declared size.
212        let effective_size = PaxState::effective_size(local_records, global_records).map_or(
213            Ok(declared_size),
214            |size| match size {
215                PaxValue::Value(size) => Ok(*size),
216                PaxValue::Deleted => Err(FrameError::deleted_pax_metadata(position, "size")),
217            },
218        )?;
219        validate_pax_member_size(position, kind, declared_size, effective_size)?;
220
221        Ok(Self {
222            position,
223            block,
224            format: ArchiveFormat::Pax,
225            kind,
226            declared_size,
227            effective_size,
228            mode,
229            uid,
230            gid,
231            mtime,
232        })
233    }
234
235    fn gnu(
236        position: u64,
237        block: Block,
238        typeflag: u8,
239        declared_size: u64,
240        require_link_kind: bool,
241        allow_all_nul_numeric_fields: bool,
242    ) -> Result<Self, FrameError> {
243        let kind = UstarKind::try_from_framed(position, typeflag)?;
244        if require_link_kind && !matches!(kind, UstarKind::HardLink | UstarKind::SymbolicLink) {
245            return Err(FrameError::unexpected_order(
246                position,
247                "hard-link or symbolic-link member after GNU long-link extension",
248                "non-link ordinary member",
249            ));
250        }
251        validate_gnu_member_size(position, kind, declared_size)?;
252        let parse_numeric_field = |field, range: Range<usize>| {
253            Self::parse_numeric_field(
254                position,
255                ArchiveFormat::Gnu,
256                field,
257                &block[range],
258                allow_all_nul_numeric_fields,
259            )
260        };
261        let mode = parse_numeric_field("mode", MODE_RANGE)?;
262        let uid = parse_numeric_field("uid", UID_RANGE)?;
263        let gid = parse_numeric_field("gid", GID_RANGE)?;
264        let mtime = parse_numeric_field("mtime", MTIME_RANGE)?;
265
266        Ok(Self {
267            position,
268            block,
269            format: ArchiveFormat::Gnu,
270            kind,
271            declared_size,
272            effective_size: declared_size,
273            mode,
274            uid,
275            gid,
276            mtime,
277        })
278    }
279
280    fn parse_numeric_field(
281        position: u64,
282        format: ArchiveFormat,
283        field: &'static str,
284        bytes: &[u8],
285        allow_all_nul_numeric_fields: bool,
286    ) -> Result<Option<u64>, FrameError> {
287        if allow_all_nul_numeric_fields && is_all_nul(bytes) {
288            return Ok(None);
289        }
290        parse_number(format, bytes).map(Some).ok_or_else(|| {
291            FrameError::at(
292                position,
293                FrameErrorInner::InvalidNumericField {
294                    field,
295                    found: bytes.to_vec(),
296                },
297            )
298        })
299    }
300
301    pub(crate) fn copy_header_path_into(&self, path: &mut Vec<u8>) {
302        path.clear();
303        let name = trim_nul(&self.block[NAME_RANGE]);
304        if self.format == ArchiveFormat::Gnu {
305            path.extend_from_slice(name);
306            return;
307        }
308        let prefix = trim_nul(&self.block[PREFIX_RANGE]);
309        if !prefix.is_empty() {
310            path.extend_from_slice(prefix);
311            path.push(b'/');
312        }
313        path.extend_from_slice(name);
314    }
315}
316
317/// The payload entry to which a data block belongs.
318#[derive(Clone, Copy, Debug, Eq, PartialEq)]
319pub enum DataOwner {
320    /// Payload bytes belonging to a pax extended header.
321    Pax(PaxKind),
322    /// Payload bytes belonging to a GNU metadata extension.
323    Gnu(GnuKind),
324    /// Payload bytes belonging to an ordinary archive member.
325    Member,
326}
327
328/// A payload physical block.
329///
330/// This can be "real" data for e.g. a file member, or it can be the payload of a pax
331/// or GNU header.
332#[derive(Clone, Debug, Eq, PartialEq)]
333pub struct DataFrame {
334    /// The absolute byte position of this block in the source stream.
335    pub position: u64,
336    /// The lossless payload block bytes, including any final padding.
337    pub block: Block,
338    /// The number of meaningful payload bytes in this block.
339    pub len: usize,
340    /// Whether this block carries metadata-extension or member data.
341    pub owner: DataOwner,
342    /// Parsed records completed by this final pax payload block.
343    ///
344    /// This is `Some` only for the last data block belonging to a local or
345    /// global pax header; other payload data carries `None`.
346    completed_pax_records: Option<SharedPaxRecords>,
347}
348
349impl DataFrame {
350    /// Returns parsed records completed by this final pax payload block.
351    ///
352    /// This returns `Some` only for the last data block belonging to a local
353    /// or global pax header.
354    pub fn completed_pax_records(&self) -> Option<&[PaxRecord]> {
355        self.completed_pax_records
356            .as_deref()
357            .map(PaxRecords::as_slice)
358    }
359
360    pub(crate) fn into_completed_pax_records(self) -> Option<SharedPaxRecords> {
361        self.completed_pax_records
362    }
363}
364
365/// The parser phase required before the next physical frame can be emitted.
366#[derive(Debug)]
367pub(super) enum State {
368    /// No payload is pending; accept a header or the first zero end marker.
369    AwaitingHeader,
370    /// Consume the payload blocks declared by a local or global pax header.
371    ReadingPax {
372        kind: PaxKind,
373        header_position: u64,
374        remaining: u64,
375        payload: Vec<u8>,
376    },
377    /// A local pax header has completed; require its ordinary ustar header.
378    AwaitingUstarHeader { records: SharedPaxRecords },
379    /// Consume uninterpreted payload blocks for a GNU `L` or `K` extension.
380    ReadingGnu {
381        kind: GnuKind,
382        remaining: u64,
383        pending: PendingGnu,
384    },
385    /// GNU metadata is pending; accept another distinct extension or its member.
386    AwaitingGnuMember { pending: PendingGnu },
387    /// Consume the payload blocks declared for an ordinary member.
388    ReadingMember { remaining: u64 },
389    /// The first zero end marker was read; require the second zero block.
390    AwaitingSecondZero,
391    /// A valid two-block end marker was consumed; no further input is examined.
392    Complete,
393    /// An error has been emitted; subsequent polls return end-of-stream.
394    Failed,
395}
396
397#[derive(Clone, Copy, Debug, Default)]
398pub(super) struct PendingGnu {
399    pub(super) long_name: bool,
400    pub(super) long_link: bool,
401}
402
403/// Ordinary-member chunk storage retained across cancellation and API changes.
404#[derive(Default)]
405struct MemberChunk {
406    buffer: Vec<u8>,
407    start_position: u64,
408    physical_len: usize,
409    meaningful_len: usize,
410    state: Option<MemberChunkState>,
411}
412
413#[derive(Clone, Copy)]
414enum MemberChunkState {
415    Reading {
416        member_remaining: u64,
417        filled: usize,
418    },
419    Ready {
420        delivered: usize,
421    },
422}
423
424/// A strict stream of POSIX-pax or GNU frames sourced from an underlying reader.
425pub struct TarStream<R> {
426    /// Our current stream position.
427    pub(super) position: u64,
428    /// Our interior source.
429    pub(super) inner: R,
430    pub(super) block: Block,
431    pub(super) block_len: usize,
432    pub(super) format: Option<ArchiveFormat>,
433    /// The currently effective global pax records, if any.
434    pub(super) global_pax_records: Option<GlobalPaxRecords>,
435    max_pax_extension_size: u64,
436    max_global_pax_extensions_size: u64,
437    global_pax_extensions_size: u64,
438    allow_all_nul_numeric_fields: bool,
439    max_gnu_extension_size: u64,
440    member_chunk: MemberChunk,
441    pub(super) state: State,
442}
443
444impl<R> TarStream<R> {
445    /// Creates a new [`TarStream`] from the given reader.
446    pub fn new(reader: R) -> Self {
447        Self {
448            position: 0,
449            inner: reader,
450            block: [0; BLOCK_SIZE],
451            block_len: 0,
452            format: None,
453            global_pax_records: None,
454            max_pax_extension_size: DEFAULT_MAX_PAX_EXTENSION_SIZE,
455            max_global_pax_extensions_size: DEFAULT_MAX_GLOBAL_PAX_EXTENSIONS_SIZE,
456            global_pax_extensions_size: 0,
457            allow_all_nul_numeric_fields: true,
458            max_gnu_extension_size: DEFAULT_MAX_GNU_EXTENSION_SIZE,
459            member_chunk: MemberChunk::default(),
460            state: State::AwaitingHeader,
461        }
462    }
463
464    /// Sets the maximum size accepted for each subsequent pax extension.
465    ///
466    /// A local or global header that declares a larger payload is rejected
467    /// before its payload is consumed. Setting the maximum to zero rejects
468    /// every nonempty extension. Setting it to [`u64::MAX`] removes the
469    /// per-extension bound; global extensions remain subject to their
470    /// cumulative limit.
471    pub fn set_max_pax_extension_size(&mut self, max_pax_extension_size: u64) {
472        self.max_pax_extension_size = max_pax_extension_size;
473    }
474
475    /// Sets the maximum cumulative size accepted for global pax extensions
476    /// before one ordinary member.
477    ///
478    /// The total resets after each ordinary member. A global header that would
479    /// increase the pending total beyond this limit is rejected before its
480    /// payload is consumed. Setting the maximum to zero rejects every nonempty
481    /// global extension. Setting it to [`u64::MAX`] removes the cumulative
482    /// bound; each extension remains subject to its individual limit.
483    pub fn set_max_global_pax_extensions_size(&mut self, max_global_pax_extensions_size: u64) {
484        self.max_global_pax_extensions_size = max_global_pax_extensions_size;
485    }
486
487    /// Sets whether wholly NUL numeric metadata fields may be accepted.
488    ///
489    /// This compatibility option applies to `mode`, `uid`, `gid`, and `mtime`
490    /// in both pax/ustar and GNU ordinary member headers. It is enabled by
491    /// default. Disabling it requires each field to use a valid numeric encoding
492    /// for its archive family.
493    pub fn set_allow_all_nul_numeric_fields(&mut self, allow: bool) {
494        self.allow_all_nul_numeric_fields = allow;
495    }
496
497    /// Sets the maximum size accepted for each GNU extension.
498    ///
499    /// A GNU extension member that declares a larger payload is rejected before
500    /// its payload is consumed. Setting the maximum to zero rejects every nonempty
501    /// GNU extension member. Setting it to [`u64::MAX`] removes the per-extension bound.
502    pub fn set_max_gnu_extension_size(&mut self, max_gnu_extension_size: u64) {
503        self.max_gnu_extension_size = max_gnu_extension_size;
504    }
505
506    /// Returns the selected archive family after the first header is read.
507    pub fn format(&self) -> Option<ArchiveFormat> {
508        self.format
509    }
510}
511
512impl<R: AsyncRead + Unpin> TarStream<R> {
513    /// Returns the next non-terminator physical archive frame.
514    ///
515    /// Reaching the end of the archive returns [`None`]. A framing error fuses
516    /// this reader, so every subsequent call also returns [`None`]. Cancelling
517    /// this operation retains any partial block for the next call.
518    pub async fn next_frame(&mut self) -> Result<Option<Frame>, FrameError> {
519        poll_fn(|context| self.poll_next_frame(context)).await
520    }
521
522    /// Reads one ordinary-member payload block without constructing a [`Frame`].
523    ///
524    /// Returns the block's position, lossless bytes, and meaningful length.
525    pub(crate) async fn read_member_block(&mut self) -> Result<(u64, Block, usize), FrameError> {
526        if self.member_chunk.state.is_some() {
527            self.complete_member_chunk().await?;
528            return self.take_member_block_from_chunk();
529        }
530        let remaining = match &self.state {
531            State::ReadingMember { remaining } => *remaining,
532            _ => {
533                self.state = State::Failed;
534                return Err(FrameError::unexpected_order(
535                    self.position,
536                    "ordinary member payload",
537                    "parser state without member payload",
538                ));
539            }
540        };
541        let (position, block) = match poll_fn(|context| self.poll_read_block(context)).await {
542            Ok(Some(block)) => block,
543            Ok(None) => {
544                let error = self.handle_eof();
545                self.state = State::Failed;
546                return Err(error);
547            }
548            Err(error) => {
549                self.state = State::Failed;
550                return Err(error);
551            }
552        };
553        let meaningful_len = remaining.min(BLOCK_SIZE as u64) as usize;
554        self.state = member_payload_state(remaining - meaningful_len as u64);
555        Ok((position, block, meaningful_len))
556    }
557
558    /// Reads aligned ordinary-member payload blocks directly into `buffer`.
559    ///
560    /// This internal path preserves exact physical-block completion checks
561    /// while avoiding lossless [`Frame`] construction for chunk consumers.
562    pub(crate) async fn read_member_chunk(
563        &mut self,
564        buffer: &mut Vec<u8>,
565        target_len: usize,
566    ) -> Result<usize, FrameError> {
567        // A cancelled block read retains its partial physical block here. Finish
568        // and deliver it before starting a direct chunk so no bytes are lost.
569        if self.member_chunk.state.is_none() && self.block_len != 0 {
570            let (_, block, meaningful_len) = self.read_member_block().await?;
571            buffer.clear();
572            buffer.extend_from_slice(&block[..meaningful_len]);
573            return Ok(meaningful_len);
574        }
575        if self.member_chunk.state.is_none() {
576            self.start_member_chunk(buffer, target_len)?;
577        }
578        self.complete_member_chunk().await?;
579        self.take_member_chunk(buffer)
580    }
581
582    fn start_member_chunk(
583        &mut self,
584        buffer: &mut Vec<u8>,
585        target_len: usize,
586    ) -> Result<(), FrameError> {
587        let member_remaining = match &self.state {
588            State::ReadingMember { remaining } => *remaining,
589            _ => {
590                self.state = State::Failed;
591                return Err(FrameError::unexpected_order(
592                    self.position,
593                    "ordinary member payload",
594                    "parser state without member payload",
595                ));
596            }
597        };
598        if self.block_len != 0 {
599            self.state = State::Failed;
600            return Err(FrameError::unexpected_order(
601                self.position,
602                "aligned ordinary member payload",
603                "partially buffered physical block",
604            ));
605        }
606
607        let target_len = u64::try_from(target_len.max(BLOCK_SIZE)).map_err(|_| {
608            FrameError::arithmetic_overflow(self.position, "member payload chunk target length")
609        })?;
610        let physical_len = member_remaining
611            .min(target_len)
612            .div_ceil(BLOCK_SIZE as u64)
613            .checked_mul(BLOCK_SIZE as u64)
614            .ok_or_else(|| {
615                FrameError::arithmetic_overflow(
616                    self.position,
617                    "member payload chunk physical length",
618                )
619            })?;
620        let meaningful_len = member_remaining.min(physical_len);
621        let physical_len = usize::try_from(physical_len).map_err(|_| {
622            FrameError::arithmetic_overflow(self.position, "member payload chunk physical length")
623        })?;
624        let meaningful_len = usize::try_from(meaningful_len).map_err(|_| {
625            FrameError::arithmetic_overflow(self.position, "member payload chunk meaningful length")
626        })?;
627
628        // Move the caller's reusable allocation into persistent storage before
629        // reading so cancellation cannot discard partial bytes or progress.
630        self.member_chunk.buffer.clear();
631        std::mem::swap(buffer, &mut self.member_chunk.buffer);
632        if self.member_chunk.buffer.len() != physical_len {
633            self.member_chunk.buffer.resize(physical_len, 0);
634        }
635        self.member_chunk.start_position = self.position;
636        self.member_chunk.physical_len = physical_len;
637        self.member_chunk.meaningful_len = meaningful_len;
638        self.member_chunk.state = Some(MemberChunkState::Reading {
639            member_remaining,
640            filled: 0,
641        });
642        Ok(())
643    }
644
645    async fn complete_member_chunk(&mut self) -> Result<(), FrameError> {
646        loop {
647            let (member_remaining, filled) = match self.member_chunk.state {
648                Some(MemberChunkState::Reading {
649                    member_remaining,
650                    filled,
651                }) => (member_remaining, filled),
652                Some(MemberChunkState::Ready { .. }) => return Ok(()),
653                None => {
654                    self.state = State::Failed;
655                    return Err(FrameError::unexpected_order(
656                        self.position,
657                        "pending member payload chunk",
658                        "parser state without a pending chunk",
659                    ));
660                }
661            };
662            let start_position = self.member_chunk.start_position;
663            let physical_len = self.member_chunk.physical_len;
664            let meaningful_len = self.member_chunk.meaningful_len;
665            if filled == physical_len {
666                self.position =
667                    checked_position(start_position, physical_len).inspect_err(|_| {
668                        self.state = State::Failed;
669                        self.member_chunk.state = None;
670                    })?;
671                let remaining = member_remaining
672                    .checked_sub(meaningful_len as u64)
673                    .ok_or_else(|| {
674                        self.state = State::Failed;
675                        self.member_chunk.state = None;
676                        FrameError::arithmetic_overflow(
677                            start_position,
678                            "remaining member payload length",
679                        )
680                    })?;
681                self.state = member_payload_state(remaining);
682                self.member_chunk.state = Some(MemberChunkState::Ready { delivered: 0 });
683                return Ok(());
684            }
685
686            let read = match poll_fn(|context| {
687                let mut read_buffer =
688                    ReadBuf::new(&mut self.member_chunk.buffer[filled..physical_len]);
689                match Pin::new(&mut self.inner).poll_read(context, &mut read_buffer) {
690                    Poll::Pending => Poll::Pending,
691                    Poll::Ready(Ok(())) => Poll::Ready(Ok(read_buffer.filled().len())),
692                    Poll::Ready(Err(source)) => Poll::Ready(Err(source)),
693                }
694            })
695            .await
696            {
697                Ok(read) => read,
698                Err(source) => {
699                    self.state = State::Failed;
700                    self.member_chunk.state = None;
701                    let error_position = checked_position(start_position, filled)?;
702                    self.position = checked_position(start_position, filled - filled % BLOCK_SIZE)?;
703                    return Err(FrameError::at(
704                        error_position,
705                        FrameErrorInner::Io { source },
706                    ));
707                }
708            };
709            if read == 0 {
710                self.state = State::Failed;
711                self.member_chunk.state = None;
712                let partial_len = filled % BLOCK_SIZE;
713                let completed_len = filled - partial_len;
714                self.position = checked_position(start_position, completed_len)?;
715                if partial_len != 0 {
716                    return Err(FrameError::at(
717                        self.position,
718                        FrameErrorInner::IncompleteBlock { read: partial_len },
719                    ));
720                }
721                let completed_len = u64::try_from(completed_len).map_err(|_| {
722                    FrameError::arithmetic_overflow(
723                        self.position,
724                        "completed member payload chunk length",
725                    )
726                })?;
727                return Err(FrameError::truncated_payload(
728                    self.position,
729                    DataOwner::Member,
730                    member_remaining - member_remaining.min(completed_len),
731                ));
732            }
733            if let Some(MemberChunkState::Reading { filled, .. }) = &mut self.member_chunk.state {
734                *filled += read;
735            }
736        }
737    }
738
739    fn take_member_chunk(&mut self, buffer: &mut Vec<u8>) -> Result<usize, FrameError> {
740        let Some(MemberChunkState::Ready { delivered }) = self.member_chunk.state.take() else {
741            self.state = State::Failed;
742            return Err(FrameError::unexpected_order(
743                self.position,
744                "completed member payload chunk",
745                "incomplete member payload chunk",
746            ));
747        };
748        let meaningful_len = self.member_chunk.meaningful_len;
749        let remaining_len = meaningful_len.checked_sub(delivered).ok_or_else(|| {
750            self.state = State::Failed;
751            FrameError::arithmetic_overflow(self.position, "undelivered member payload length")
752        })?;
753        if delivered != 0 {
754            self.member_chunk
755                .buffer
756                .copy_within(delivered..meaningful_len, 0);
757        }
758        self.member_chunk.buffer.truncate(remaining_len);
759        std::mem::swap(buffer, &mut self.member_chunk.buffer);
760        Ok(remaining_len)
761    }
762
763    fn take_member_block_from_chunk(&mut self) -> Result<(u64, Block, usize), FrameError> {
764        let Some(MemberChunkState::Ready { delivered }) = self.member_chunk.state else {
765            self.state = State::Failed;
766            return Err(FrameError::unexpected_order(
767                self.position,
768                "completed member payload chunk",
769                "incomplete member payload chunk",
770            ));
771        };
772        let start_position = self.member_chunk.start_position;
773        let physical_len = self.member_chunk.physical_len;
774        let total_meaningful_len = self.member_chunk.meaningful_len;
775        let position = checked_position(start_position, delivered).inspect_err(|_| {
776            self.state = State::Failed;
777            self.member_chunk.state = None;
778        })?;
779        let mut block = [0; BLOCK_SIZE];
780        block.copy_from_slice(&self.member_chunk.buffer[delivered..delivered + BLOCK_SIZE]);
781        let meaningful_len = total_meaningful_len
782            .checked_sub(delivered)
783            .ok_or_else(|| {
784                self.state = State::Failed;
785                self.member_chunk.state = None;
786                FrameError::arithmetic_overflow(self.position, "undelivered member payload length")
787            })?
788            .min(BLOCK_SIZE);
789        let delivered = delivered + BLOCK_SIZE;
790        if delivered == physical_len {
791            self.member_chunk.state = None;
792        } else {
793            self.member_chunk.state = Some(MemberChunkState::Ready { delivered });
794        }
795        Ok((position, block, meaningful_len))
796    }
797
798    fn poll_read_block(
799        &mut self,
800        cx: &mut Context<'_>,
801    ) -> Poll<Result<Option<PositionedBlock>, FrameError>> {
802        while self.block_len < BLOCK_SIZE {
803            let mut read_buf = ReadBuf::new(&mut self.block[self.block_len..]);
804            match Pin::new(&mut self.inner).poll_read(cx, &mut read_buf) {
805                Poll::Pending => return Poll::Pending,
806                Poll::Ready(Err(source)) => {
807                    return Poll::Ready(Err(FrameError::at(
808                        self.position + self.block_len as u64,
809                        FrameErrorInner::Io { source },
810                    )));
811                }
812                Poll::Ready(Ok(())) => {
813                    let read = read_buf.filled().len();
814                    if read == 0 {
815                        if self.block_len == 0 {
816                            return Poll::Ready(Ok(None));
817                        }
818                        return Poll::Ready(Err(FrameError::at(
819                            self.position,
820                            FrameErrorInner::IncompleteBlock {
821                                read: self.block_len,
822                            },
823                        )));
824                    }
825                    self.block_len += read;
826                }
827            }
828        }
829
830        let position = self.position;
831        self.position = self
832            .position
833            .checked_add(BLOCK_SIZE as u64)
834            .ok_or_else(|| FrameError::arithmetic_overflow(position, "stream position"))?;
835        self.block_len = 0;
836        let block = std::mem::replace(&mut self.block, [0; BLOCK_SIZE]);
837        Poll::Ready(Ok(Some((position, block))))
838    }
839
840    fn handle_eof(&mut self) -> FrameError {
841        let inner = match &self.state {
842            State::AwaitingHeader | State::AwaitingSecondZero => FrameErrorInner::MissingEndMarker,
843            State::ReadingPax {
844                kind, remaining, ..
845            } => FrameErrorInner::TruncatedPayload {
846                owner: DataOwner::Pax(*kind),
847                remaining: *remaining,
848            },
849            State::AwaitingUstarHeader { .. } => FrameErrorInner::UnexpectedEof {
850                expected: "ordinary ustar member header after a local pax header",
851            },
852            State::ReadingGnu {
853                kind, remaining, ..
854            } => FrameErrorInner::TruncatedPayload {
855                owner: DataOwner::Gnu(*kind),
856                remaining: *remaining,
857            },
858            State::AwaitingGnuMember { .. } => FrameErrorInner::UnexpectedEof {
859                expected: "ordinary GNU member header after a GNU metadata extension",
860            },
861            State::ReadingMember { remaining } => FrameErrorInner::TruncatedPayload {
862                owner: DataOwner::Member,
863                remaining: *remaining,
864            },
865            State::Complete | State::Failed => FrameErrorInner::UnexpectedEof {
866                expected: "no further input",
867            },
868        };
869        FrameError::at(self.position, inner)
870    }
871
872    fn process_block(&mut self, position: u64, block: Block) -> Result<Option<Frame>, FrameError> {
873        let state = std::mem::replace(&mut self.state, State::Failed);
874        match state {
875            State::AwaitingHeader => {
876                if is_zero_block(&block) {
877                    self.state = State::AwaitingSecondZero;
878                    Ok(None)
879                } else {
880                    self.process_boundary_header(position, block).map(Some)
881                }
882            }
883            State::ReadingPax {
884                kind,
885                header_position,
886                mut remaining,
887                mut payload,
888            } => {
889                let len = remaining.min(BLOCK_SIZE as u64) as usize;
890                payload.extend_from_slice(&block[..len]);
891                remaining -= len as u64;
892                let completed_pax_records = if remaining == 0 {
893                    let records = Arc::new(
894                        PaxRecords::parse(
895                            &payload,
896                            self.global_pax_records
897                                .as_ref()
898                                .map_or(HdrCharset::Utf8, GlobalPaxRecords::hdrcharset),
899                        )
900                        .map_err(|source| {
901                            FrameError::invalid_pax_record(header_position, source)
902                        })?,
903                    );
904                    match kind {
905                        PaxKind::Local => {
906                            self.state = State::AwaitingUstarHeader {
907                                records: records.clone(),
908                            };
909                        }
910                        PaxKind::Global => {
911                            records.apply_global(&mut self.global_pax_records);
912                            self.state = State::AwaitingHeader;
913                        }
914                    }
915                    Some(records)
916                } else {
917                    self.state = State::ReadingPax {
918                        kind,
919                        header_position,
920                        remaining,
921                        payload,
922                    };
923                    None
924                };
925                Ok(Some(Frame::Data(DataFrame {
926                    position,
927                    block,
928                    len,
929                    owner: DataOwner::Pax(kind),
930                    completed_pax_records,
931                })))
932            }
933            State::AwaitingUstarHeader { records } => {
934                if is_zero_block(&block) {
935                    return Err(FrameError::unexpected_order(
936                        position,
937                        "ordinary ustar member header after a local pax header",
938                        "end-of-archive marker",
939                    ));
940                }
941                let parsed = self.parse_format_checked_header(position, &block)?;
942                if matches!(parsed.typeflag, b'x' | b'g') {
943                    return Err(FrameError::unexpected_order(
944                        position,
945                        "ordinary ustar member header after a local pax header",
946                        "another pax extended header",
947                    ));
948                }
949                self.process_ustar_header(position, block, parsed, Some(records))
950                    .map(Some)
951            }
952            State::ReadingGnu {
953                kind,
954                mut remaining,
955                pending,
956            } => {
957                let len = remaining.min(BLOCK_SIZE as u64) as usize;
958                remaining -= len as u64;
959                if remaining == 0 {
960                    self.state = State::AwaitingGnuMember { pending };
961                } else {
962                    self.state = State::ReadingGnu {
963                        kind,
964                        remaining,
965                        pending,
966                    };
967                }
968                Ok(Some(Frame::Data(DataFrame {
969                    position,
970                    block,
971                    len,
972                    owner: DataOwner::Gnu(kind),
973                    completed_pax_records: None,
974                })))
975            }
976            State::AwaitingGnuMember { pending } => {
977                if is_zero_block(&block) {
978                    return Err(FrameError::unexpected_order(
979                        position,
980                        "ordinary GNU member header after a GNU metadata extension",
981                        "end-of-archive marker",
982                    ));
983                }
984                let parsed = self.parse_format_checked_header(position, &block)?;
985                self.process_gnu_header(position, block, parsed, pending)
986                    .map(Some)
987            }
988            State::ReadingMember { mut remaining } => {
989                let len = remaining.min(BLOCK_SIZE as u64) as usize;
990                remaining -= len as u64;
991                self.state = member_payload_state(remaining);
992                Ok(Some(Frame::Data(DataFrame {
993                    position,
994                    block,
995                    len,
996                    owner: DataOwner::Member,
997                    completed_pax_records: None,
998                })))
999            }
1000            State::AwaitingSecondZero => {
1001                if !is_zero_block(&block) {
1002                    return Err(FrameError::at(position, FrameErrorInner::InvalidEndMarker));
1003                }
1004                self.state = State::Complete;
1005                Ok(None)
1006            }
1007            State::Complete => {
1008                self.state = State::Complete;
1009                Ok(None)
1010            }
1011            State::Failed => Ok(None),
1012        }
1013    }
1014
1015    fn process_boundary_header(
1016        &mut self,
1017        position: u64,
1018        block: Block,
1019    ) -> Result<Frame, FrameError> {
1020        let parsed = self.parse_format_checked_header(position, &block)?;
1021        match parsed.format {
1022            ArchiveFormat::Pax => self.process_pax_boundary_header(position, block, parsed),
1023            ArchiveFormat::Gnu => {
1024                self.process_gnu_header(position, block, parsed, PendingGnu::default())
1025            }
1026        }
1027    }
1028
1029    /// Parses a header and enforces the archive's single selected format.
1030    ///
1031    /// The first non-terminator header selects the format; later headers must
1032    /// decode as valid headers of that same family.
1033    fn parse_format_checked_header(
1034        &mut self,
1035        position: u64,
1036        block: &Block,
1037    ) -> Result<ParsedHeader, FrameError> {
1038        let parsed = ParsedHeader::try_from_framed(position, block)?;
1039        if let Some(expected) = self.format
1040            && parsed.format != expected
1041        {
1042            return Err(FrameError::at(
1043                position,
1044                FrameErrorInner::FormatMismatch {
1045                    expected,
1046                    found: parsed.format,
1047                },
1048            ));
1049        }
1050        self.format.get_or_insert(parsed.format);
1051        Ok(parsed)
1052    }
1053
1054    /// Processes a pax/ustar header at an archive-member boundary, where a new
1055    /// pax extension or an ordinary ustar member may begin.
1056    ///
1057    /// Pax extension headers enter [`State::ReadingPax`]; ordinary ustar
1058    /// headers are delegated to [`Self::process_ustar_header`].
1059    fn process_pax_boundary_header(
1060        &mut self,
1061        position: u64,
1062        block: Block,
1063        parsed: ParsedHeader,
1064    ) -> Result<Frame, FrameError> {
1065        match parsed.typeflag {
1066            b'x' => self.process_pax_header(position, block, parsed.size, PaxKind::Local),
1067            b'g' => self.process_pax_header(position, block, parsed.size, PaxKind::Global),
1068            _ => self.process_ustar_header(position, block, parsed, None),
1069        }
1070    }
1071
1072    /// Emits a pax extension header and enters its payload-reading state.
1073    ///
1074    /// This is reached only from the POSIX boundary state, before any local
1075    /// pax records require an ordinary member header.
1076    fn process_pax_header(
1077        &mut self,
1078        position: u64,
1079        block: Block,
1080        payload_size: u64,
1081        kind: PaxKind,
1082    ) -> Result<Frame, FrameError> {
1083        if payload_size > self.max_pax_extension_size {
1084            return Err(FrameError::at(
1085                position,
1086                FrameErrorInner::ExtensionTooLarge {
1087                    format: ArchiveFormat::Pax,
1088                    size: payload_size,
1089                    limit: self.max_pax_extension_size,
1090                },
1091            ));
1092        }
1093        if kind == PaxKind::Global {
1094            let size = self
1095                .global_pax_extensions_size
1096                .checked_add(payload_size)
1097                .ok_or_else(|| {
1098                    FrameError::arithmetic_overflow(position, "global pax extension payload total")
1099                })?;
1100            if size > self.max_global_pax_extensions_size {
1101                return Err(FrameError::at(
1102                    position,
1103                    FrameErrorInner::GlobalPaxExtensionsTooLarge {
1104                        size,
1105                        limit: self.max_global_pax_extensions_size,
1106                    },
1107                ));
1108            }
1109            self.global_pax_extensions_size = size;
1110        }
1111        if payload_size == 0 {
1112            return Err(FrameError::invalid_pax_record(
1113                position,
1114                PaxError::InvalidRecords {
1115                    reason: "extended header payload contains no records",
1116                },
1117            ));
1118        }
1119        self.state = State::ReadingPax {
1120            kind,
1121            header_position: position,
1122            remaining: payload_size,
1123            payload: Vec::new(),
1124        };
1125        Ok(Frame::Pax(PaxFrame {
1126            position,
1127            block,
1128            kind,
1129            payload_size,
1130        }))
1131    }
1132
1133    /// Emits an ordinary ustar member header after applying pax size state.
1134    ///
1135    /// This handles both bare members and members required by
1136    /// [`State::AwaitingUstarHeader`], then enters member data reading when
1137    /// the effective member size requires payload blocks.
1138    fn process_ustar_header(
1139        &mut self,
1140        position: u64,
1141        block: Block,
1142        parsed: ParsedHeader,
1143        local_pax_records: Option<SharedPaxRecords>,
1144    ) -> Result<Frame, FrameError> {
1145        let frame = HeaderFrame::ustar(
1146            position,
1147            block,
1148            parsed.typeflag,
1149            parsed.size,
1150            local_pax_records.as_deref(),
1151            self.global_pax_records.as_ref(),
1152            self.allow_all_nul_numeric_fields,
1153        )?;
1154        self.global_pax_extensions_size = 0;
1155        self.state = member_payload_state(frame.effective_size);
1156        Ok(Frame::Header(frame))
1157    }
1158
1159    fn process_gnu_header(
1160        &mut self,
1161        position: u64,
1162        block: Block,
1163        parsed: ParsedHeader,
1164        mut pending: PendingGnu,
1165    ) -> Result<Frame, FrameError> {
1166        let extension = match parsed.typeflag {
1167            b'L' => Some(GnuKind::LongName),
1168            b'K' => Some(GnuKind::LongLink),
1169            _ => None,
1170        };
1171        if let Some(kind) = extension {
1172            let already_seen = match kind {
1173                GnuKind::LongName => &mut pending.long_name,
1174                GnuKind::LongLink => &mut pending.long_link,
1175            };
1176            if *already_seen {
1177                return Err(FrameError::unexpected_order(
1178                    position,
1179                    "ordinary GNU member header or the other GNU metadata extension",
1180                    "duplicate GNU metadata extension",
1181                ));
1182            }
1183            if parsed.size > self.max_gnu_extension_size {
1184                return Err(FrameError::at(
1185                    position,
1186                    FrameErrorInner::ExtensionTooLarge {
1187                        format: ArchiveFormat::Gnu,
1188                        size: parsed.size,
1189                        limit: self.max_gnu_extension_size,
1190                    },
1191                ));
1192            }
1193            *already_seen = true;
1194            self.state = if parsed.size == 0 {
1195                State::AwaitingGnuMember { pending }
1196            } else {
1197                State::ReadingGnu {
1198                    kind,
1199                    remaining: parsed.size,
1200                    pending,
1201                }
1202            };
1203            return Ok(Frame::Gnu(GnuFrame {
1204                position,
1205                block,
1206                kind,
1207                payload_size: parsed.size,
1208            }));
1209        }
1210
1211        let frame = HeaderFrame::gnu(
1212            position,
1213            block,
1214            parsed.typeflag,
1215            parsed.size,
1216            pending.long_link,
1217            self.allow_all_nul_numeric_fields,
1218        )?;
1219        self.state = member_payload_state(frame.effective_size);
1220        Ok(Frame::Header(frame))
1221    }
1222
1223    fn poll_next_frame(
1224        &mut self,
1225        context: &mut Context<'_>,
1226    ) -> Poll<Result<Option<Frame>, FrameError>> {
1227        loop {
1228            if matches!(self.state, State::Complete | State::Failed) {
1229                return Poll::Ready(Ok(None));
1230            }
1231
1232            let (position, block) = match self.poll_read_block(context) {
1233                Poll::Pending => return Poll::Pending,
1234                Poll::Ready(Ok(Some(block))) => block,
1235                Poll::Ready(Ok(None)) => {
1236                    let error = self.handle_eof();
1237                    self.state = State::Failed;
1238                    return Poll::Ready(Err(error));
1239                }
1240                Poll::Ready(Err(error)) => {
1241                    self.state = State::Failed;
1242                    return Poll::Ready(Err(error));
1243                }
1244            };
1245
1246            match self.process_block(position, block) {
1247                Ok(Some(frame)) => return Poll::Ready(Ok(Some(frame))),
1248                Ok(None) => continue,
1249                Err(error) => {
1250                    self.state = State::Failed;
1251                    return Poll::Ready(Err(error));
1252                }
1253            }
1254        }
1255    }
1256}
1257
1258struct ParsedHeader {
1259    format: ArchiveFormat,
1260    typeflag: u8,
1261    size: u64,
1262}
1263
1264/// Converts raw tar input into a typed value while retaining source position
1265/// for any framing error produced by the conversion.
1266trait TryFromFramed<T>: Sized {
1267    fn try_from_framed(position: u64, source: T) -> Result<Self, FrameError>;
1268}
1269
1270fn is_zero_block(block: &Block) -> bool {
1271    block.iter().all(|byte| *byte == 0)
1272}
1273
1274fn trim_nul(bytes: &[u8]) -> &[u8] {
1275    let end = bytes
1276        .iter()
1277        .position(|byte| *byte == 0)
1278        .unwrap_or(bytes.len());
1279    &bytes[..end]
1280}
1281
1282fn member_payload_state(remaining: u64) -> State {
1283    if remaining == 0 {
1284        State::AwaitingHeader
1285    } else {
1286        State::ReadingMember { remaining }
1287    }
1288}
1289
1290fn checked_position(position: u64, len: usize) -> Result<u64, FrameError> {
1291    let len = u64::try_from(len)
1292        .map_err(|_| FrameError::arithmetic_overflow(position, "stream position"))?;
1293    position
1294        .checked_add(len)
1295        .ok_or_else(|| FrameError::arithmetic_overflow(position, "stream position"))
1296}
1297
1298impl TryFromFramed<&Block> for ParsedHeader {
1299    fn try_from_framed(position: u64, block: &Block) -> Result<Self, FrameError> {
1300        let format = match &block[IDENTITY_RANGE] {
1301            identity if identity == USTAR_IDENTITY => ArchiveFormat::Pax,
1302            identity if identity == GNU_IDENTITY => ArchiveFormat::Gnu,
1303            identity => {
1304                return Err(FrameError::at(
1305                    position,
1306                    FrameErrorInner::InvalidIdentity {
1307                        found: identity.try_into().expect("fixed header range"),
1308                    },
1309                ));
1310            }
1311        };
1312
1313        let actual_checksum = checksum(block);
1314        let expected_checksum = parse_octal(&block[CHECKSUM_RANGE]);
1315        if expected_checksum != Some(actual_checksum) {
1316            return Err(FrameError::at(
1317                position,
1318                FrameErrorInner::InvalidChecksum {
1319                    expected: expected_checksum,
1320                    actual: actual_checksum,
1321                },
1322            ));
1323        }
1324
1325        let size_bytes: [u8; 12] = block[SIZE_RANGE].try_into().expect("fixed header range");
1326        let size = parse_number(format, &size_bytes).ok_or_else(|| {
1327            FrameError::at(position, FrameErrorInner::InvalidSize { found: size_bytes })
1328        })?;
1329
1330        Ok(Self {
1331            format,
1332            typeflag: block[TYPEFLAG_OFFSET],
1333            size,
1334        })
1335    }
1336}
1337
1338impl TryFromFramed<u8> for UstarKind {
1339    fn try_from_framed(position: u64, typeflag: u8) -> Result<Self, FrameError> {
1340        match typeflag {
1341            0 | b'0' => Ok(Self::Regular),
1342            b'1' => Ok(Self::HardLink),
1343            b'2' => Ok(Self::SymbolicLink),
1344            b'3' => Ok(Self::CharacterDevice),
1345            b'4' => Ok(Self::BlockDevice),
1346            b'5' => Ok(Self::Directory),
1347            b'6' => Ok(Self::Fifo),
1348            b'7' => Ok(Self::Contiguous),
1349            _ => Err(FrameError::at(
1350                position,
1351                FrameErrorInner::UnsupportedTypeflag { typeflag },
1352            )),
1353        }
1354    }
1355}
1356
1357fn validate_pax_member_size(
1358    position: u64,
1359    kind: UstarKind,
1360    declared_size: u64,
1361    effective_size: u64,
1362) -> Result<(), FrameError> {
1363    match kind {
1364        // PAX permits a nonzero physical hardlink size and allows pax `size`
1365        // records to override it, so the effective size controls framing.
1366        // This is a broadening of what ustar allows; ustar requires
1367        // hardlink members to have `size=0`.
1368        UstarKind::Regular | UstarKind::HardLink | UstarKind::Contiguous => Ok(()),
1369        UstarKind::SymbolicLink
1370        | UstarKind::CharacterDevice
1371        | UstarKind::BlockDevice
1372        | UstarKind::Directory
1373        | UstarKind::Fifo => {
1374            // NOTE: Observe that we're strict about directory entries having
1375            // `size=0`, even though ustar/pax says that they may have a nonzero
1376            // size as an allocation hint (which, in turn, does not affect framing).
1377            // We do this to avoid a common differential where some parsers incorrectly
1378            // honor the directory entry's size during framing.
1379            // TODO: Make this configurable? Doing so seems very risky.
1380            validate_payload_free_size(position, kind, declared_size)?;
1381            validate_payload_free_size(position, kind, effective_size)
1382        }
1383    }
1384}
1385
1386fn validate_gnu_member_size(position: u64, kind: UstarKind, size: u64) -> Result<(), FrameError> {
1387    match kind {
1388        UstarKind::Regular | UstarKind::Contiguous => Ok(()),
1389        UstarKind::HardLink
1390        | UstarKind::SymbolicLink
1391        | UstarKind::CharacterDevice
1392        | UstarKind::BlockDevice
1393        | UstarKind::Directory
1394        | UstarKind::Fifo => validate_payload_free_size(position, kind, size),
1395    }
1396}
1397
1398fn validate_payload_free_size(position: u64, kind: UstarKind, size: u64) -> Result<(), FrameError> {
1399    if size == 0 {
1400        Ok(())
1401    } else {
1402        Err(FrameError::at(
1403            position,
1404            FrameErrorInner::InvalidMemberSize { kind, size },
1405        ))
1406    }
1407}
1408
1409#[cfg(test)]
1410mod tests {
1411    use std::{
1412        cell::Cell,
1413        pin::Pin,
1414        rc::Rc,
1415        task::{Context, Poll},
1416    };
1417
1418    use tokio::io::ReadBuf;
1419
1420    use super::*;
1421    use crate::{
1422        ArchiveFormat, FrameError, FrameErrorInner, HdrCharset, PaxString, PaxValue,
1423        header::{DEVMAJOR_RANGE, DEVMINOR_RANGE},
1424        test_support::{
1425            ChunkedReader, append_block, append_gnu, append_pax, append_payload, append_terminator,
1426            collect_frames, gnu_base256_header, gnu_header, header, ready, record, set_checksum,
1427        },
1428    };
1429
1430    fn collect(bytes: Vec<u8>, max_chunk: usize) -> Vec<Result<Frame, FrameError>> {
1431        ready(collect_frames(TarStream::new(ChunkedReader::new(
1432            bytes, max_chunk,
1433        ))))
1434    }
1435
1436    fn collect_with_max_pax_extension_size(
1437        bytes: Vec<u8>,
1438        max_chunk: usize,
1439        max_pax_extension_size: u64,
1440    ) -> Vec<Result<Frame, FrameError>> {
1441        let mut stream = TarStream::new(ChunkedReader::new(bytes, max_chunk));
1442        stream.set_max_pax_extension_size(max_pax_extension_size);
1443        ready(collect_frames(stream))
1444    }
1445
1446    fn header_frame(frames: &[Result<Frame, FrameError>], index: usize) -> &HeaderFrame {
1447        let Ok(Frame::Header(frame)) = &frames[index] else {
1448            panic!("expected header frame");
1449        };
1450        frame
1451    }
1452
1453    fn data_frame(frames: &[Result<Frame, FrameError>], index: usize) -> &DataFrame {
1454        let Ok(Frame::Data(frame)) = &frames[index] else {
1455            panic!("expected data frame");
1456        };
1457        frame
1458    }
1459
1460    fn last_error(frames: &[Result<Frame, FrameError>]) -> &FrameError {
1461        frames
1462            .last()
1463            .expect("stream should emit an item")
1464            .as_ref()
1465            .expect_err("last item should be an error")
1466    }
1467
1468    fn last_error_inner(frames: &[Result<Frame, FrameError>]) -> &FrameErrorInner {
1469        &last_error(frames).inner
1470    }
1471
1472    struct CountingReader {
1473        bytes: Vec<u8>,
1474        position: usize,
1475        consumed: Rc<Cell<usize>>,
1476    }
1477
1478    impl AsyncRead for CountingReader {
1479        fn poll_read(
1480            mut self: Pin<&mut Self>,
1481            _context: &mut Context<'_>,
1482            buffer: &mut ReadBuf<'_>,
1483        ) -> Poll<std::io::Result<()>> {
1484            let len = buffer
1485                .remaining()
1486                .min(self.bytes.len().saturating_sub(self.position));
1487            let end = self.position + len;
1488            buffer.put_slice(&self.bytes[self.position..end]);
1489            self.position = end;
1490            self.consumed.set(self.consumed.get() + len);
1491            Poll::Ready(Ok(()))
1492        }
1493    }
1494
1495    #[derive(Clone, Copy)]
1496    enum ExpectedHeaderError {
1497        InvalidIdentity,
1498        InvalidChecksum,
1499        InvalidSize,
1500        InvalidNumericField(&'static str),
1501        UnterminatedUstarStringField(&'static str),
1502        UnsupportedTypeflag(u8),
1503    }
1504
1505    impl ExpectedHeaderError {
1506        fn matches(self, error: &FrameErrorInner) -> bool {
1507            match (self, error) {
1508                (Self::InvalidIdentity, FrameErrorInner::InvalidIdentity { .. })
1509                | (Self::InvalidChecksum, FrameErrorInner::InvalidChecksum { .. })
1510                | (Self::InvalidSize, FrameErrorInner::InvalidSize { .. }) => true,
1511                (
1512                    Self::InvalidNumericField(field),
1513                    FrameErrorInner::InvalidNumericField { field: found, .. },
1514                )
1515                | (
1516                    Self::UnterminatedUstarStringField(field),
1517                    FrameErrorInner::UnterminatedUstarStringField { field: found },
1518                ) => field == *found,
1519                (
1520                    Self::UnsupportedTypeflag(typeflag),
1521                    FrameErrorInner::UnsupportedTypeflag { typeflag: found },
1522                ) => typeflag == *found,
1523                _ => false,
1524            }
1525        }
1526    }
1527
1528    fn checksummed_header(mutate: impl FnOnce(&mut Block)) -> Block {
1529        let mut block = header(b'0', 0);
1530        mutate(&mut block);
1531        set_checksum(&mut block);
1532        block
1533    }
1534
1535    fn invalid_header_cases() -> Vec<(&'static str, Block, ExpectedHeaderError)> {
1536        let mut bad_magic = header(b'0', 0);
1537        bad_magic[IDENTITY_RANGE.start] = b'g';
1538        let mut bad_version = header(b'0', 0);
1539        bad_version[IDENTITY_RANGE.end - 2..IDENTITY_RANGE.end].copy_from_slice(b"  ");
1540        let mut bad_checksum = header(b'0', 0);
1541        bad_checksum[0] = b'X';
1542
1543        vec![
1544            ("magic", bad_magic, ExpectedHeaderError::InvalidIdentity),
1545            ("version", bad_version, ExpectedHeaderError::InvalidIdentity),
1546            (
1547                "checksum",
1548                bad_checksum,
1549                ExpectedHeaderError::InvalidChecksum,
1550            ),
1551            (
1552                "octal size",
1553                checksummed_header(|block| {
1554                    block[SIZE_RANGE].copy_from_slice(b"00000000008\0");
1555                }),
1556                ExpectedHeaderError::InvalidSize,
1557            ),
1558            (
1559                "base256 size",
1560                checksummed_header(|block| block[SIZE_RANGE.start] = 0x80),
1561                ExpectedHeaderError::InvalidSize,
1562            ),
1563            (
1564                "octal mode",
1565                checksummed_header(|block| {
1566                    block[MODE_RANGE].copy_from_slice(b"0000080\0");
1567                }),
1568                ExpectedHeaderError::InvalidNumericField("mode"),
1569            ),
1570            (
1571                "uid",
1572                checksummed_header(|block| {
1573                    block[UID_RANGE].copy_from_slice(b"invalid\0");
1574                }),
1575                ExpectedHeaderError::InvalidNumericField("uid"),
1576            ),
1577            (
1578                "gid",
1579                checksummed_header(|block| block[GID_RANGE.start] = b'8'),
1580                ExpectedHeaderError::InvalidNumericField("gid"),
1581            ),
1582            (
1583                "mtime",
1584                checksummed_header(|block| {
1585                    block[MTIME_RANGE].copy_from_slice(b"00000000008\0");
1586                }),
1587                ExpectedHeaderError::InvalidNumericField("mtime"),
1588            ),
1589            (
1590                "uname",
1591                checksummed_header(|block| block[UNAME_RANGE].fill(b'u')),
1592                ExpectedHeaderError::UnterminatedUstarStringField("uname"),
1593            ),
1594            (
1595                "gname",
1596                checksummed_header(|block| block[GNAME_RANGE].fill(b'g')),
1597                ExpectedHeaderError::UnterminatedUstarStringField("gname"),
1598            ),
1599            (
1600                "POSIX typeflag",
1601                header(b'X', 0),
1602                ExpectedHeaderError::UnsupportedTypeflag(b'X'),
1603            ),
1604            (
1605                "GNU typeflag",
1606                header(b'L', 0),
1607                ExpectedHeaderError::UnsupportedTypeflag(b'L'),
1608            ),
1609        ]
1610    }
1611
1612    #[test]
1613    fn frames_bare_member_across_fragmented_reads() {
1614        let mut bytes = Vec::new();
1615        append_block(&mut bytes, &header(b'0', 513));
1616        append_payload(&mut bytes, &[b'a'; BLOCK_SIZE]);
1617        append_payload(&mut bytes, b"b");
1618        append_terminator(&mut bytes);
1619
1620        let frames = collect(bytes, 7);
1621        assert_eq!(frames.len(), 3);
1622        let header = header_frame(&frames, 0);
1623        assert_eq!(header.kind, UstarKind::Regular);
1624        assert_eq!(header.declared_size, 513);
1625        assert_eq!(header.effective_size, 513);
1626        let first = data_frame(&frames, 1);
1627        let last = data_frame(&frames, 2);
1628        assert_eq!(first.len, BLOCK_SIZE);
1629        assert_eq!(last.len, 1);
1630        assert_eq!(last.owner, DataOwner::Member);
1631        assert!(first.completed_pax_records().is_none());
1632        assert!(last.completed_pax_records().is_none());
1633    }
1634
1635    #[test]
1636    fn frames_multiblock_pax_records_and_applies_size_override() {
1637        let mut payload = record("comment", &"x".repeat(BLOCK_SIZE));
1638        payload.extend_from_slice(&record("size", "513"));
1639        assert!(payload.len() > BLOCK_SIZE);
1640
1641        let mut bytes = Vec::new();
1642        append_pax(&mut bytes, b'x', &payload);
1643        append_block(&mut bytes, &header(b'0', 1));
1644        append_payload(&mut bytes, &[b'a'; BLOCK_SIZE]);
1645        append_payload(&mut bytes, b"b");
1646        append_terminator(&mut bytes);
1647
1648        let frames = collect(bytes, 19);
1649        assert_eq!(frames.len(), 6);
1650        let Frame::Pax(pax) = frames[0].as_ref().unwrap() else {
1651            panic!("expected pax header");
1652        };
1653        assert_eq!(pax.kind, PaxKind::Local);
1654        assert_eq!(pax.payload_size, payload.len() as u64);
1655        let first_pax_data = data_frame(&frames, 1);
1656        assert_eq!(first_pax_data.owner, DataOwner::Pax(PaxKind::Local));
1657        assert!(first_pax_data.completed_pax_records().is_none());
1658        let final_pax_data = data_frame(&frames, 2);
1659        assert_eq!(final_pax_data.owner, DataOwner::Pax(PaxKind::Local));
1660        assert_eq!(
1661            final_pax_data
1662                .completed_pax_records()
1663                .and_then(|records| records.last()),
1664            Some(&PaxRecord::Size(PaxValue::Value(513)))
1665        );
1666        let header = header_frame(&frames, 3);
1667        assert_eq!(header.declared_size, 1);
1668        assert_eq!(header.effective_size, 513);
1669        let last = data_frame(&frames, 5);
1670        assert_eq!(last.len, 1);
1671    }
1672
1673    #[test]
1674    fn rejects_oversized_pax_extensions_before_consuming_payload() {
1675        let mut payload = record("comment", "metadata");
1676        payload.extend_from_slice(&record("mtime", "1"));
1677        let declared_size = u64::try_from(payload.len()).expect("payload size should fit u64");
1678        for (case, typeflag) in [("local", b'x'), ("global", b'g')] {
1679            let mut bytes = Vec::new();
1680            append_pax(&mut bytes, typeflag, &payload);
1681            let frames = collect_with_max_pax_extension_size(bytes, BLOCK_SIZE, declared_size - 1);
1682            assert_eq!(frames.len(), 1, "{case}");
1683            assert!(matches!(
1684                last_error(&frames),
1685                FrameError {
1686                    position: 0,
1687                    inner: FrameErrorInner::ExtensionTooLarge {
1688                        format: ArchiveFormat::Pax,
1689                        size,
1690                        limit,
1691                    },
1692                } if *size == declared_size && *limit == declared_size - 1
1693            ));
1694        }
1695
1696        let frames = collect(
1697            header(b'x', DEFAULT_MAX_PAX_EXTENSION_SIZE + 1).to_vec(),
1698            BLOCK_SIZE,
1699        );
1700        assert_eq!(frames.len(), 1);
1701        assert!(matches!(
1702            last_error(&frames),
1703            FrameError {
1704                position: 0,
1705                inner: FrameErrorInner::ExtensionTooLarge {
1706                    format: ArchiveFormat::Pax,
1707                    size,
1708                    limit: DEFAULT_MAX_PAX_EXTENSION_SIZE,
1709                },
1710            } if *size == DEFAULT_MAX_PAX_EXTENSION_SIZE + 1
1711        ));
1712    }
1713
1714    #[test]
1715    fn oversized_pax_extension_does_not_read_its_payload_block() {
1716        let mut bytes = header(b'x', 1).to_vec();
1717        bytes.resize(BLOCK_SIZE * 2, 0);
1718        let consumed = Rc::new(Cell::new(0));
1719        let reader = CountingReader {
1720            bytes,
1721            position: 0,
1722            consumed: Rc::clone(&consumed),
1723        };
1724        let mut stream = TarStream::new(reader);
1725        stream.set_max_pax_extension_size(0);
1726
1727        assert!(matches!(
1728            ready(stream.next_frame()),
1729            Err(FrameError {
1730                position: 0,
1731                inner: FrameErrorInner::ExtensionTooLarge {
1732                    format: ArchiveFormat::Pax,
1733                    size: 1,
1734                    limit: 0,
1735                },
1736            })
1737        ));
1738        assert_eq!(consumed.get(), BLOCK_SIZE);
1739    }
1740
1741    #[test]
1742    fn accepts_pax_extensions_at_the_configured_limit() {
1743        let mut payload = record("comment", "metadata");
1744        payload.extend_from_slice(&record("ACME.attribute", "value"));
1745        for (case, typeflag) in [("local", b'x'), ("global", b'g')] {
1746            let mut bytes = Vec::new();
1747            append_pax(&mut bytes, typeflag, &payload);
1748            if typeflag == b'x' {
1749                append_block(&mut bytes, &header(b'0', 0));
1750            }
1751            append_terminator(&mut bytes);
1752
1753            let frames = collect_with_max_pax_extension_size(
1754                bytes,
1755                7,
1756                payload
1757                    .len()
1758                    .try_into()
1759                    .expect("payload size should fit u64"),
1760            );
1761            assert!(frames.iter().all(Result::is_ok), "{case}");
1762        }
1763    }
1764
1765    #[test]
1766    fn applies_global_pax_records_overrides_and_rejects_size_deletions() {
1767        let mut initial_global = record("comment", "old");
1768        initial_global.extend_from_slice(&record("size", "2"));
1769        let replacement_global = record("comment", "new");
1770        let mut local = record("comment", "local");
1771        local.extend_from_slice(&record("size", "3"));
1772        let mut deletion = record("comment", "");
1773        deletion.extend_from_slice(&record("size", ""));
1774
1775        let mut bytes = Vec::new();
1776        append_pax(&mut bytes, b'g', &initial_global);
1777        append_pax(&mut bytes, b'g', &replacement_global);
1778        append_block(&mut bytes, &header(b'0', 1));
1779        append_payload(&mut bytes, b"ab");
1780        append_pax(&mut bytes, b'x', &local);
1781        append_block(&mut bytes, &header(b'0', 1));
1782        append_payload(&mut bytes, b"abc");
1783        append_pax(&mut bytes, b'g', &deletion);
1784        append_block(&mut bytes, &header(b'5', 1));
1785        append_terminator(&mut bytes);
1786
1787        let frames = collect(bytes, 31);
1788        assert!(frames.iter().any(|frame| matches!(
1789            frame,
1790            Ok(Frame::Pax(PaxFrame {
1791                kind: PaxKind::Global,
1792                ..
1793            }))
1794        )));
1795        assert!(frames.iter().any(|frame| matches!(
1796            frame,
1797            Ok(Frame::Data(DataFrame {
1798                owner: DataOwner::Pax(PaxKind::Global),
1799                ..
1800            }))
1801        )));
1802        let completed_global_payloads: Vec<&[PaxRecord]> = frames
1803            .iter()
1804            .filter_map(|frame| match frame {
1805                Ok(Frame::Data(frame)) if frame.owner == DataOwner::Pax(PaxKind::Global) => {
1806                    frame.completed_pax_records()
1807                }
1808                _ => None,
1809            })
1810            .collect();
1811        assert_eq!(completed_global_payloads.len(), 3);
1812        assert_eq!(
1813            completed_global_payloads[2],
1814            [
1815                PaxRecord::Comment(PaxValue::Deleted),
1816                PaxRecord::Size(PaxValue::Deleted),
1817            ]
1818        );
1819        let headers: Vec<&HeaderFrame> = frames
1820            .iter()
1821            .filter_map(|frame| match frame {
1822                Ok(Frame::Header(header)) => Some(header),
1823                _ => None,
1824            })
1825            .collect();
1826        assert_eq!(headers.len(), 2);
1827        assert_eq!(headers[0].effective_size, 2);
1828        assert_eq!(headers[1].effective_size, 3);
1829        assert!(frames.iter().any(|frame| {
1830            matches!(
1831                frame,
1832                Ok(Frame::Data(frame))
1833                    if frame.owner == DataOwner::Pax(PaxKind::Local)
1834                        && frame.completed_pax_records() == Some(local_records("local", 3).as_slice())
1835            )
1836        }));
1837        assert!(matches!(
1838            last_error_inner(&frames),
1839            FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1840        ));
1841    }
1842
1843    fn local_records(comment: &str, size: u64) -> Vec<PaxRecord> {
1844        vec![
1845            PaxRecord::Comment(PaxValue::Value(comment.into())),
1846            PaxRecord::Size(PaxValue::Value(size)),
1847        ]
1848    }
1849
1850    #[test]
1851    fn allows_local_size_deletion_when_a_later_record_restores_size() {
1852        let mut local = record("size", "");
1853        local.extend_from_slice(&record("size", "2"));
1854        let mut bytes = Vec::new();
1855        append_pax(&mut bytes, b'x', &local);
1856        append_block(&mut bytes, &header(b'0', 1));
1857        append_payload(&mut bytes, b"ab");
1858        append_terminator(&mut bytes);
1859
1860        let frames = collect(bytes, BLOCK_SIZE);
1861        let header = header_frame(&frames, 2);
1862        assert_eq!(header.effective_size, 2);
1863        assert_eq!(
1864            data_frame(&frames, 1).completed_pax_records(),
1865            Some(
1866                [
1867                    PaxRecord::Size(PaxValue::Deleted),
1868                    PaxRecord::Size(PaxValue::Value(2)),
1869                ]
1870                .as_slice()
1871            )
1872        );
1873    }
1874
1875    #[test]
1876    fn pax_records_do_not_make_malformed_ordinary_header_fields_valid() {
1877        let cases = [
1878            (
1879                "local uid",
1880                b'x',
1881                record("uid", "1"),
1882                checksummed_header(|block| block[UID_RANGE].fill(b'u')),
1883                ExpectedHeaderError::InvalidNumericField("uid"),
1884            ),
1885            (
1886                "global gid",
1887                b'g',
1888                record("gid", "2"),
1889                checksummed_header(|block| block[GID_RANGE].fill(b'g')),
1890                ExpectedHeaderError::InvalidNumericField("gid"),
1891            ),
1892            (
1893                "local mtime",
1894                b'x',
1895                record("mtime", "3"),
1896                checksummed_header(|block| block[MTIME_RANGE].fill(b'm')),
1897                ExpectedHeaderError::InvalidNumericField("mtime"),
1898            ),
1899            (
1900                "global uname",
1901                b'g',
1902                record("uname", "user"),
1903                checksummed_header(|block| block[UNAME_RANGE].fill(b'u')),
1904                ExpectedHeaderError::UnterminatedUstarStringField("uname"),
1905            ),
1906            (
1907                "local gname",
1908                b'x',
1909                record("gname", "group"),
1910                checksummed_header(|block| block[GNAME_RANGE].fill(b'g')),
1911                ExpectedHeaderError::UnterminatedUstarStringField("gname"),
1912            ),
1913        ];
1914
1915        for (case, typeflag, records, malformed, expected) in cases {
1916            let mut bytes = Vec::new();
1917            append_pax(&mut bytes, typeflag, &records);
1918            append_block(&mut bytes, &malformed);
1919            append_terminator(&mut bytes);
1920
1921            let frames = collect(bytes, BLOCK_SIZE);
1922            assert!(
1923                expected.matches(last_error_inner(&frames)),
1924                "{case}: {frames:?}"
1925            );
1926        }
1927    }
1928
1929    #[test]
1930    fn accepts_all_nul_unused_device_fields() {
1931        let block = header(b'0', 0);
1932        assert_eq!(parse_octal(&block[DEVMAJOR_RANGE]), None);
1933        assert_eq!(parse_octal(&block[DEVMINOR_RANGE]), None);
1934
1935        let mut bytes = Vec::new();
1936        append_block(&mut bytes, &block);
1937        append_terminator(&mut bytes);
1938        assert!(collect(bytes, BLOCK_SIZE).iter().all(Result::is_ok));
1939    }
1940
1941    #[test]
1942    fn rejects_local_size_deletion_for_payload_free_members() {
1943        let global = record("size", "7");
1944        let local = record("size", "");
1945        let mut bytes = Vec::new();
1946        append_pax(&mut bytes, b'g', &global);
1947        append_pax(&mut bytes, b'x', &local);
1948        append_block(&mut bytes, &header(b'5', 3));
1949        append_terminator(&mut bytes);
1950
1951        assert!(matches!(
1952            last_error_inner(&collect(bytes, BLOCK_SIZE)),
1953            FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1954        ));
1955    }
1956
1957    #[test]
1958    fn rejects_deleted_size_when_member_payload_cannot_be_framed() {
1959        let records = record("size", "");
1960        for typeflag in [b'x', b'g'] {
1961            let mut bytes = Vec::new();
1962            append_pax(&mut bytes, typeflag, &records);
1963            append_block(&mut bytes, &header(b'0', 0));
1964
1965            assert!(
1966                matches!(
1967                    last_error_inner(&collect(bytes, BLOCK_SIZE)),
1968                    FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1969                ),
1970                "{typeflag:?}"
1971            );
1972        }
1973    }
1974
1975    #[test]
1976    fn allows_local_size_to_restore_an_active_global_deletion() {
1977        let global = record("size", "");
1978        let local = record("size", "2");
1979        let mut bytes = Vec::new();
1980        append_pax(&mut bytes, b'g', &global);
1981        append_pax(&mut bytes, b'x', &local);
1982        append_block(&mut bytes, &header(b'0', 1));
1983        append_payload(&mut bytes, b"ab");
1984        append_terminator(&mut bytes);
1985
1986        let frames = collect(bytes, BLOCK_SIZE);
1987        let header = header_frame(&frames, 4);
1988        assert_eq!(header.effective_size, 2);
1989        assert_eq!(
1990            data_frame(&frames, 1).completed_pax_records(),
1991            Some([PaxRecord::Size(PaxValue::Deleted)].as_slice())
1992        );
1993        assert_eq!(
1994            data_frame(&frames, 3).completed_pax_records(),
1995            Some([PaxRecord::Size(PaxValue::Value(2))].as_slice())
1996        );
1997    }
1998
1999    #[test]
2000    fn frames_pax_hard_link_bodies_from_header_or_size_override() {
2001        for (case, declared_size, override_size, header_index, data_index) in [
2002            ("physical size", 3, None, 0, 1),
2003            ("pax size", 0, Some("3"), 2, 3),
2004            ("pax size overrides physical size", 1, Some("3"), 2, 3),
2005        ] {
2006            let mut bytes = Vec::new();
2007            if let Some(override_size) = override_size {
2008                append_pax(&mut bytes, b'x', &record("size", override_size));
2009            }
2010            append_block(&mut bytes, &header(b'1', declared_size));
2011            append_payload(&mut bytes, b"abc");
2012            append_terminator(&mut bytes);
2013
2014            let frames = collect(bytes, BLOCK_SIZE);
2015            let header = header_frame(&frames, header_index);
2016            assert_eq!(header.format, ArchiveFormat::Pax, "{case}");
2017            assert_eq!(header.kind, UstarKind::HardLink, "{case}");
2018            assert_eq!(header.declared_size, declared_size, "{case}");
2019            assert_eq!(header.effective_size, 3, "{case}");
2020            assert_eq!(data_frame(&frames, data_index).len, 3, "{case}");
2021        }
2022    }
2023
2024    #[test]
2025    fn zero_data_block_is_not_a_terminator() {
2026        let mut bytes = Vec::new();
2027        append_block(&mut bytes, &header(b'0', BLOCK_SIZE as u64));
2028        append_block(&mut bytes, &[0; BLOCK_SIZE]);
2029        append_terminator(&mut bytes);
2030
2031        let frames = collect(bytes, BLOCK_SIZE);
2032        assert_eq!(frames.len(), 2);
2033        assert!(matches!(frames[1], Ok(Frame::Data(_))));
2034    }
2035
2036    #[test]
2037    fn zero_filled_block_inside_pax_payload_is_data() {
2038        let payload = record("comment", &"\0".repeat(BLOCK_SIZE * 3));
2039        let mut bytes = Vec::new();
2040        append_pax(&mut bytes, b'x', &payload);
2041        append_block(&mut bytes, &header(b'0', 0));
2042        append_terminator(&mut bytes);
2043
2044        let frames = collect(bytes, BLOCK_SIZE);
2045        assert!(frames.iter().any(|frame| matches!(
2046            frame,
2047            Ok(Frame::Data(DataFrame {
2048                block,
2049                owner: DataOwner::Pax(PaxKind::Local),
2050                ..
2051            })) if is_zero_block(block)
2052        )));
2053    }
2054
2055    #[test]
2056    fn frames_gnu_long_metadata_and_base256_payloads() {
2057        let mut bytes = Vec::new();
2058        append_block(&mut bytes, &gnu_base256_header(b'L', 513));
2059        append_payload(&mut bytes, &[b'n'; BLOCK_SIZE]);
2060        append_payload(&mut bytes, b"\0");
2061        append_gnu(&mut bytes, b'K', b"link\0");
2062        append_block(&mut bytes, &gnu_header(b'2', 0));
2063        append_terminator(&mut bytes);
2064
2065        let frames = collect(bytes, 13);
2066        assert_eq!(frames.len(), 6);
2067        assert!(matches!(
2068            frames[0].as_ref().unwrap(),
2069            Frame::Gnu(GnuFrame {
2070                kind: GnuKind::LongName,
2071                payload_size: 513,
2072                ..
2073            })
2074        ));
2075        let final_name = data_frame(&frames, 2);
2076        assert_eq!(final_name.owner, DataOwner::Gnu(GnuKind::LongName));
2077        assert_eq!(final_name.len, 1);
2078        assert!(final_name.completed_pax_records().is_none());
2079        assert!(matches!(
2080            frames[3].as_ref().unwrap(),
2081            Frame::Gnu(GnuFrame {
2082                kind: GnuKind::LongLink,
2083                ..
2084            })
2085        ));
2086        let header = header_frame(&frames, 5);
2087        assert_eq!(header.kind, UstarKind::SymbolicLink);
2088    }
2089
2090    #[test]
2091    fn rejects_header_format_type_and_field_errors() {
2092        for (case, block, expected) in invalid_header_cases() {
2093            let frames = collect(block.to_vec(), BLOCK_SIZE);
2094            let error = last_error_inner(&frames);
2095            assert!(expected.matches(error), "{case}: {error:?}");
2096        }
2097    }
2098
2099    #[test]
2100    fn rejects_malformed_gnu_numeric_fields() {
2101        for (field, range) in [
2102            ("mode", MODE_RANGE),
2103            ("uid", UID_RANGE),
2104            ("gid", GID_RANGE),
2105            ("mtime", MTIME_RANGE),
2106        ] {
2107            let mut block = gnu_header(b'0', 0);
2108            block[range].fill(b'x');
2109            set_checksum(&mut block);
2110
2111            assert!(matches!(
2112                last_error_inner(&collect(block.to_vec(), BLOCK_SIZE)),
2113                FrameErrorInner::InvalidNumericField { field: found, .. } if *found == field
2114            ));
2115        }
2116    }
2117
2118    #[test]
2119    fn rejects_nonzero_physical_sizes_for_payload_free_members() {
2120        for (format, block, kind) in [
2121            (ArchiveFormat::Pax, header(b'2', 1), UstarKind::SymbolicLink),
2122            (ArchiveFormat::Gnu, gnu_header(b'1', 1), UstarKind::HardLink),
2123            (
2124                ArchiveFormat::Gnu,
2125                gnu_header(b'2', 1),
2126                UstarKind::SymbolicLink,
2127            ),
2128            (
2129                ArchiveFormat::Pax,
2130                header(b'3', 1),
2131                UstarKind::CharacterDevice,
2132            ),
2133            (
2134                ArchiveFormat::Gnu,
2135                gnu_header(b'3', 1),
2136                UstarKind::CharacterDevice,
2137            ),
2138            (ArchiveFormat::Pax, header(b'4', 1), UstarKind::BlockDevice),
2139            (
2140                ArchiveFormat::Gnu,
2141                gnu_header(b'4', 1),
2142                UstarKind::BlockDevice,
2143            ),
2144            (ArchiveFormat::Pax, header(b'5', 1), UstarKind::Directory),
2145            (
2146                ArchiveFormat::Gnu,
2147                gnu_header(b'5', 1),
2148                UstarKind::Directory,
2149            ),
2150            (ArchiveFormat::Pax, header(b'6', 1), UstarKind::Fifo),
2151            (ArchiveFormat::Gnu, gnu_header(b'6', 1), UstarKind::Fifo),
2152        ] {
2153            let frames = collect(block.to_vec(), BLOCK_SIZE);
2154            assert!(
2155                matches!(
2156                    last_error_inner(&frames),
2157                    FrameErrorInner::InvalidMemberSize {
2158                        kind: found,
2159                        size: 1,
2160                    } if *found == kind
2161                ),
2162                "{format:?} {kind:?}"
2163            );
2164        }
2165    }
2166
2167    #[test]
2168    fn rejects_nonzero_declared_or_effective_pax_sizes_for_payload_free_members() {
2169        for (case, declared_size, override_size) in [("effective", 0, "1"), ("declared", 1, "0")] {
2170            for (typeflag, kind) in [
2171                (b'2', UstarKind::SymbolicLink),
2172                (b'3', UstarKind::CharacterDevice),
2173                (b'4', UstarKind::BlockDevice),
2174                (b'5', UstarKind::Directory),
2175                (b'6', UstarKind::Fifo),
2176            ] {
2177                let mut bytes = Vec::new();
2178                append_pax(&mut bytes, b'x', &record("size", override_size));
2179                append_block(&mut bytes, &header(typeflag, declared_size));
2180
2181                assert!(
2182                    matches!(
2183                        last_error_inner(&collect(bytes, BLOCK_SIZE)),
2184                        FrameErrorInner::InvalidMemberSize {
2185                            kind: found,
2186                            size: 1,
2187                        } if *found == kind
2188                    ),
2189                    "{case} {kind:?}"
2190                );
2191            }
2192        }
2193    }
2194
2195    #[test]
2196    fn header_errors_preserve_later_header_positions() {
2197        let position = BLOCK_SIZE as u64;
2198
2199        for (case, block, expected) in invalid_header_cases() {
2200            let mut bytes = Vec::new();
2201            append_block(&mut bytes, &header(b'0', 0));
2202            append_block(&mut bytes, &block);
2203            let frames = collect(bytes, BLOCK_SIZE);
2204            let error = last_error(&frames);
2205            assert_eq!(error.position, position, "{case}");
2206            assert!(expected.matches(&error.inner), "{case}: {error:?}");
2207        }
2208    }
2209
2210    #[test]
2211    fn rejects_invalid_pax_sequences() {
2212        assert!(matches!(
2213            last_error_inner(&collect(header(b'x', 0).to_vec(), BLOCK_SIZE)),
2214            FrameErrorInner::InvalidPaxRecord {
2215                source: PaxError::InvalidRecords { .. },
2216            }
2217        ));
2218
2219        let valid = record("path", "name");
2220        let mut consecutive = Vec::new();
2221        append_pax(&mut consecutive, b'x', &valid);
2222        append_block(&mut consecutive, &header(b'x', valid.len() as u64));
2223        assert!(matches!(
2224            last_error_inner(&collect(consecutive, BLOCK_SIZE)),
2225            FrameErrorInner::UnexpectedOrder { .. }
2226        ));
2227
2228        let mut missing_member = Vec::new();
2229        append_pax(&mut missing_member, b'x', &valid);
2230        assert!(matches!(
2231            last_error_inner(&collect(missing_member, BLOCK_SIZE)),
2232            FrameErrorInner::UnexpectedEof { .. }
2233        ));
2234    }
2235
2236    #[test]
2237    fn preserves_pax_parse_error_positions_in_stream() {
2238        let invalid = record("size", "bad");
2239        let mut bytes = Vec::new();
2240        append_block(&mut bytes, &header(b'0', 0));
2241        append_pax(&mut bytes, b'x', &invalid);
2242
2243        let frames = collect(bytes, BLOCK_SIZE);
2244        assert!(matches!(
2245            frames.last(),
2246            Some(Err(FrameError {
2247                position,
2248                inner: FrameErrorInner::InvalidPaxRecord {
2249                    source: PaxError::InvalidInteger { .. },
2250                },
2251            })) if *position == BLOCK_SIZE as u64
2252        ));
2253    }
2254
2255    #[test]
2256    fn accepts_binary_and_rejects_unknown_pax_charsets() {
2257        let mut global = record("hdrcharset", "BINARY");
2258        global.extend_from_slice(&record("path", "global"));
2259        let local = record("path", "local");
2260        let mut bytes = Vec::new();
2261        append_pax(&mut bytes, b'g', &global);
2262        append_pax(&mut bytes, b'x', &local);
2263        append_block(&mut bytes, &header(b'0', 0));
2264        append_terminator(&mut bytes);
2265        let frames = collect(bytes, BLOCK_SIZE);
2266        let member_header = header_frame(&frames, 4);
2267        assert_eq!(member_header.kind, UstarKind::Regular);
2268        assert_eq!(
2269            data_frame(&frames, 1).completed_pax_records(),
2270            Some(
2271                [
2272                    PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Binary)),
2273                    PaxRecord::Path(PaxValue::Value(PaxString::Binary(
2274                        b"global".to_vec().into(),
2275                    ))),
2276                ]
2277                .as_slice()
2278            )
2279        );
2280        assert_eq!(
2281            data_frame(&frames, 3).completed_pax_records(),
2282            Some(
2283                [PaxRecord::Path(PaxValue::Value(PaxString::Binary(
2284                    b"local".to_vec().into()
2285                )))]
2286                .as_slice()
2287            )
2288        );
2289
2290        let records = record("hdrcharset", "ISO-IR 8859 1 1998");
2291        let mut bytes = Vec::new();
2292        append_pax(&mut bytes, b'x', &records);
2293        assert!(matches!(
2294            last_error_inner(&collect(bytes, BLOCK_SIZE)),
2295            FrameErrorInner::InvalidPaxRecord {
2296                source: PaxError::UnsupportedCharset { value },
2297            } if value == "ISO-IR 8859 1 1998"
2298        ));
2299    }
2300
2301    #[test]
2302    fn rejects_invalid_gnu_sequences_and_sizes() {
2303        let mut duplicate = Vec::new();
2304        append_block(&mut duplicate, &gnu_header(b'L', 0));
2305        append_block(&mut duplicate, &gnu_header(b'L', 0));
2306        let mut long_link_for_regular = Vec::new();
2307        append_block(&mut long_link_for_regular, &gnu_header(b'K', 0));
2308        append_block(&mut long_link_for_regular, &gnu_header(b'0', 0));
2309        let mut dangling = Vec::new();
2310        append_block(&mut dangling, &gnu_header(b'L', 0));
2311        append_terminator(&mut dangling);
2312        for (case, bytes) in [
2313            ("duplicate", duplicate),
2314            ("long-link-for-regular", long_link_for_regular),
2315            ("dangling", dangling),
2316        ] {
2317            assert!(
2318                matches!(
2319                    last_error_inner(&collect(bytes, BLOCK_SIZE)),
2320                    FrameErrorInner::UnexpectedOrder { .. }
2321                ),
2322                "{case}"
2323            );
2324        }
2325
2326        assert!(matches!(
2327            last_error_inner(&collect(gnu_header(b'S', 0).to_vec(), BLOCK_SIZE)),
2328            FrameErrorInner::UnsupportedTypeflag { typeflag: b'S' }
2329        ));
2330
2331        let mut negative_size = gnu_header(b'0', 0);
2332        negative_size[SIZE_RANGE].fill(0xff);
2333        set_checksum(&mut negative_size);
2334        assert!(matches!(
2335            last_error_inner(&collect(negative_size.to_vec(), BLOCK_SIZE)),
2336            FrameErrorInner::InvalidSize { .. }
2337        ));
2338    }
2339
2340    #[test]
2341    fn detects_one_archive_family_and_rejects_mixing() {
2342        let mut posix_then_gnu = Vec::new();
2343        append_block(&mut posix_then_gnu, &header(b'0', 0));
2344        append_block(&mut posix_then_gnu, &gnu_header(b'0', 0));
2345        assert!(matches!(
2346            last_error_inner(&collect(posix_then_gnu, BLOCK_SIZE)),
2347            FrameErrorInner::FormatMismatch {
2348                expected: ArchiveFormat::Pax,
2349                found: ArchiveFormat::Gnu,
2350            }
2351        ));
2352
2353        // A family mismatch applies only to a successfully decoded header.
2354        let mut malformed_gnu = gnu_header(b'0', 0);
2355        malformed_gnu[0] = b'X';
2356        let mut posix_then_malformed_gnu = Vec::new();
2357        append_block(&mut posix_then_malformed_gnu, &header(b'0', 0));
2358        append_block(&mut posix_then_malformed_gnu, &malformed_gnu);
2359        assert!(matches!(
2360            last_error_inner(&collect(posix_then_malformed_gnu, BLOCK_SIZE)),
2361            FrameErrorInner::InvalidChecksum { .. }
2362        ));
2363
2364        let mut gnu_then_posix = Vec::new();
2365        append_block(&mut gnu_then_posix, &gnu_header(b'0', 0));
2366        append_block(&mut gnu_then_posix, &header(b'0', 0));
2367        assert!(matches!(
2368            last_error_inner(&collect(gnu_then_posix, BLOCK_SIZE)),
2369            FrameErrorInner::FormatMismatch {
2370                expected: ArchiveFormat::Gnu,
2371                found: ArchiveFormat::Pax,
2372            }
2373        ));
2374
2375        for typeflag in [b'x', b'g'] {
2376            assert!(
2377                matches!(
2378                    last_error_inner(&collect(gnu_header(typeflag, 0).to_vec(), BLOCK_SIZE)),
2379                    FrameErrorInner::UnsupportedTypeflag { typeflag: found } if *found == typeflag
2380                ),
2381                "{typeflag:?}"
2382            );
2383        }
2384
2385        let mut empty = Vec::new();
2386        append_terminator(&mut empty);
2387        let mut stream = TarStream::new(ChunkedReader::new(empty, BLOCK_SIZE));
2388        assert!(matches!(ready(stream.next_frame()), Ok(None)));
2389        assert_eq!(stream.format(), None);
2390    }
2391
2392    #[test]
2393    fn rejects_truncation_and_invalid_termination() {
2394        assert!(matches!(
2395            last_error_inner(&collect(vec![0; 3], 1)),
2396            FrameErrorInner::IncompleteBlock { read: 3 }
2397        ));
2398
2399        let mut payload_truncated = Vec::new();
2400        append_block(&mut payload_truncated, &header(b'0', 1));
2401        assert!(matches!(
2402            last_error_inner(&collect(payload_truncated, BLOCK_SIZE)),
2403            FrameErrorInner::TruncatedPayload {
2404                owner: DataOwner::Member,
2405                ..
2406            }
2407        ));
2408
2409        let mut pax_payload_truncated = Vec::new();
2410        append_block(&mut pax_payload_truncated, &header(b'x', 513));
2411        append_payload(&mut pax_payload_truncated, b"11 path=x\n");
2412        assert!(matches!(
2413            last_error_inner(&collect(pax_payload_truncated, BLOCK_SIZE)),
2414            FrameErrorInner::TruncatedPayload {
2415                owner: DataOwner::Pax(PaxKind::Local),
2416                ..
2417            }
2418        ));
2419
2420        let mut missing_second_zero = Vec::new();
2421        append_block(&mut missing_second_zero, &header(b'0', 0));
2422        append_block(&mut missing_second_zero, &[0; BLOCK_SIZE]);
2423        assert!(matches!(
2424            last_error_inner(&collect(missing_second_zero, BLOCK_SIZE)),
2425            FrameErrorInner::MissingEndMarker
2426        ));
2427
2428        let mut bad_second_zero = Vec::new();
2429        append_block(&mut bad_second_zero, &header(b'0', 0));
2430        append_block(&mut bad_second_zero, &[0; BLOCK_SIZE]);
2431        append_block(&mut bad_second_zero, &header(b'0', 0));
2432        assert!(matches!(
2433            last_error_inner(&collect(bad_second_zero, BLOCK_SIZE)),
2434            FrameErrorInner::InvalidEndMarker
2435        ));
2436    }
2437
2438    #[test]
2439    fn stream_is_fused_after_first_error() {
2440        let mut stream = TarStream::new(ChunkedReader::new(header(b'L', 0).to_vec(), BLOCK_SIZE));
2441        assert!(matches!(
2442            ready(stream.next_frame()),
2443            Err(FrameError {
2444                position: 0,
2445                inner: FrameErrorInner::UnsupportedTypeflag { typeflag: b'L' },
2446            })
2447        ));
2448        assert!(matches!(ready(stream.next_frame()), Ok(None)));
2449    }
2450}