Skip to main content

tar_framing/
logical.rs

1//! Member-oriented reading above the lossless physical frame stream.
2//!
3//! This API assembles PAX and GNU extension payloads with the ordinary members
4//! they describe. Each member carries a compact borrowed [`Header`], and each
5//! PAX member carries one unified [`PaxState`].
6
7use std::{borrow::Cow, mem, ops::Range};
8
9use tokio::io::AsyncRead;
10
11use crate::{
12    ArchiveFormat, Block, FrameError, FrameErrorInner, GnuKind, PaxKeyword, PaxKind, PaxRecord,
13    PaxString, PaxValue, UstarKind,
14    header::{GNAME_RANGE, LINK_NAME_RANGE, UNAME_RANGE},
15    pax::GlobalPaxRecords,
16    stream::{DataFrame, DataOwner, Frame, HeaderFrame, TarStream},
17};
18
19pub use crate::{PaxExtension, PaxState};
20
21const PAYLOAD_DRAIN_CHUNK_BYTES: usize = 1024 * 1024;
22
23/// A GNU long-name or long-link value needed to interpret a member.
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub struct GnuMetadata {
26    /// The absolute byte position of the GNU extension header block.
27    pub position: u64,
28    /// The meaningful metadata payload bytes, excluding tar padding.
29    pub payload: Vec<u8>,
30}
31
32/// Extension metadata attached to one ordinary archive member.
33#[derive(Clone, Debug, Eq, PartialEq)]
34pub enum MemberExtensions<'a> {
35    /// Unified pax metadata applicable to an ordinary ustar member, borrowing
36    /// effective global values from the logical reader.
37    Pax(PaxState<'a>),
38    /// GNU metadata applying to an ordinary GNU member.
39    Gnu {
40        /// Optional GNU long-name metadata.
41        long_name: Option<GnuMetadata>,
42        /// Optional GNU long-link metadata.
43        long_link: Option<GnuMetadata>,
44    },
45}
46
47/// Extracted ordinary-header metadata for one logical archive member.
48///
49/// Unlike [`HeaderFrame`], this type does not retain the lossless physical
50/// header block. Its ordinary path and link-path fallbacks borrow reusable
51/// storage owned by [`TarReader`].
52#[derive(Clone, Copy, Debug, Eq, PartialEq)]
53pub struct Header<'a> {
54    /// The absolute byte position of the ordinary member header block.
55    pub position: u64,
56    /// The selected archive family of this member header.
57    pub format: ArchiveFormat,
58    /// The member type identified by the header.
59    pub kind: UstarKind,
60    /// The size encoded directly in the member header field.
61    pub declared_size: u64,
62    /// The size after applying applicable pax `size` records.
63    ///
64    /// This is also the number of payload bytes exposed through
65    /// [`MemberPayload`]. Member kinds that cannot carry payload are rejected
66    /// when either their declared or effective size is nonzero.
67    pub effective_size: u64,
68    /// Permission and mode bits decoded from the ordinary header, if present.
69    ///
70    /// Note that pax only defines the semantics of the lower 12 bits of this
71    /// field. Higher bits may or may not be set, and have no assigned semantics.
72    ///
73    /// This is [`None`] only when the field is wholly NUL and the framing policy
74    /// permits missing numeric metadata.
75    pub mode: Option<u64>,
76    /// Numeric user identifier from the ordinary header, if present.
77    ///
78    /// This is [`None`] only when the field is wholly NUL and the framing policy
79    /// permits missing numeric metadata.
80    ///
81    /// Applicable pax metadata may override or delete this fallback.
82    pub uid: Option<u64>,
83    /// Numeric group identifier from the ordinary header, if present.
84    ///
85    /// This is [`None`] only when the field is wholly NUL and the framing policy
86    /// permits missing numeric metadata.
87    ///
88    /// Applicable pax metadata may override or delete this fallback.
89    pub gid: Option<u64>,
90    /// Modification time in seconds from the ordinary header, if present.
91    ///
92    /// This is [`None`] only when the field is wholly NUL and the framing policy
93    /// permits missing numeric metadata.
94    ///
95    /// Applicable pax metadata may override or delete this fallback.
96    pub mtime: Option<u64>,
97    /// User name bytes from the ordinary header, empty if absent or unusable.
98    ///
99    /// Applicable pax metadata may override or delete this fallback.
100    pub uname: &'a [u8],
101    /// Group name bytes from the ordinary header, empty if absent or unusable.
102    ///
103    /// Applicable pax metadata may override or delete this fallback.
104    pub gname: &'a [u8],
105    header_path: &'a [u8],
106    link_name: &'a [u8],
107}
108
109/// One meaningful payload block belonging to an ordinary archive member.
110#[derive(Clone, Debug, Eq, PartialEq)]
111pub struct PayloadBlock {
112    /// The absolute byte position of this payload block.
113    pub position: u64,
114    /// The lossless payload block bytes, including any final padding.
115    pub block: Block,
116    /// The number of meaningful bytes in this block.
117    pub len: usize,
118}
119
120/// An ordinary archive member and its streaming payload cursor.
121pub struct MemberFrame<'a, R> {
122    /// The ordinary member header.
123    pub header: Header<'a>,
124    /// Extension metadata applying to this member.
125    pub extensions: MemberExtensions<'a>,
126    /// A cursor over the member payload bytes.
127    pub payload: MemberPayload<'a, R>,
128}
129
130impl<R> MemberFrame<'_, R> {
131    /// Returns the effective member path after applying pax or GNU metadata.
132    ///
133    /// An explicit pax deletion is an error because it also removes the
134    /// ordinary-header fallback required to identify this member. Empty paths
135    /// and paths containing embedded NUL bytes are also rejected.
136    pub fn effective_path(&self) -> Result<Cow<'_, [u8]>, FrameError> {
137        let path = effective_member_path(&self.header, &self.extensions)?;
138        if path.is_empty() {
139            return Err(FrameError::at(
140                self.header.position,
141                FrameErrorInner::EmptyMemberPath,
142            ));
143        }
144        reject_nul(self.header.position, "path", path.as_ref())?;
145        Ok(path)
146    }
147
148    /// Returns the effective member link target after applying pax or GNU metadata.
149    ///
150    /// An explicit pax deletion is an error because it also removes the
151    /// ordinary-header fallback required to identify a link target. Link
152    /// targets containing embedded NUL bytes are also rejected.
153    pub fn effective_link_path(&self) -> Result<Cow<'_, [u8]>, FrameError> {
154        let path = match &self.extensions {
155            MemberExtensions::Pax(state) => resolve_pax_text(
156                self.header.position,
157                state,
158                &PaxKeyword::LinkPath,
159                "linkpath",
160                Cow::Borrowed(self.header.link_name),
161                |record| match record {
162                    PaxRecord::LinkPath(value) => Some(value),
163                    _ => None,
164                },
165            ),
166            MemberExtensions::Gnu { long_link, .. } => match long_link {
167                Some(metadata) => Ok(Cow::Borrowed(parse_gnu_metadata(
168                    metadata,
169                    GnuKind::LongLink,
170                )?)),
171                None => Ok(Cow::Borrowed(self.header.link_name)),
172            },
173        }?;
174        reject_nul(self.header.position, "link path", path.as_ref())?;
175        Ok(path)
176    }
177}
178
179/// A streaming, typed cursor over one member's payload blocks.
180pub struct MemberPayload<'a, R> {
181    reader: &'a mut PayloadReader<R>,
182}
183
184/// A logical reader that assembles physical frames into archive-level items.
185///
186/// Unlike [`TarStream`], this API attaches PAX or GNU extension metadata to the
187/// ordinary member it describes. Each PAX member carries one [`PaxState`] with
188/// effective metadata and newly encountered positioned extensions. Ordinary
189/// header path and link-path fallbacks are copied into reusable storage and
190/// borrowed by the returned [`Header`].
191pub struct TarReader<R> {
192    // Keep the logical effective state outside `payload` so a returned
193    // `PaxState` can borrow it while `MemberPayload` mutably borrows only the
194    // independent payload machinery. `TarStream` maintains its own physical
195    // copy for framing decisions.
196    global_pax_records: Option<GlobalPaxRecords>,
197    payload: PayloadReader<R>,
198    header_storage: HeaderStorage,
199    pending_extensions: PendingExtensions,
200    extension_payload: Option<ExtensionPayload>,
201}
202
203/// Payload state kept separate so [`MemberPayload`] can borrow it mutably while
204/// the logical [`Header`] borrows reusable header storage.
205struct PayloadReader<R> {
206    stream: TarStream<R>,
207    remaining: u64,
208    drain_buffer: Vec<u8>,
209}
210
211/// Logical member metadata retained across cancellation of [`TarReader::next_frame`].
212#[derive(Default)]
213struct PendingExtensions {
214    global_pax: Vec<PaxExtension>,
215    local_pax: Option<PaxExtension>,
216    gnu_long_name: Option<GnuMetadata>,
217    gnu_long_link: Option<GnuMetadata>,
218}
219
220impl PendingExtensions {
221    fn set_gnu(&mut self, kind: GnuKind, metadata: GnuMetadata) {
222        *match kind {
223            GnuKind::LongName => &mut self.gnu_long_name,
224            GnuKind::LongLink => &mut self.gnu_long_link,
225        } = Some(metadata);
226    }
227}
228
229/// An extension payload being assembled across physical frames.
230enum ExtensionPayload {
231    Pax {
232        position: u64,
233        kind: PaxKind,
234    },
235    Gnu {
236        position: u64,
237        kind: GnuKind,
238        remaining: u64,
239        payload: Vec<u8>,
240    },
241}
242
243#[derive(Default)]
244struct HeaderStorage {
245    path: Vec<u8>,
246    link_name: Vec<u8>,
247    uname: Vec<u8>,
248    gname: Vec<u8>,
249}
250
251impl HeaderStorage {
252    fn update<'a>(&'a mut self, frame: &HeaderFrame) -> Header<'a> {
253        frame.copy_header_path_into(&mut self.path);
254        copy_string_field_into(&frame.block, LINK_NAME_RANGE, &mut self.link_name);
255        copy_string_field_into(&frame.block, UNAME_RANGE, &mut self.uname);
256        copy_string_field_into(&frame.block, GNAME_RANGE, &mut self.gname);
257        Header {
258            position: frame.position,
259            format: frame.format,
260            kind: frame.kind,
261            declared_size: frame.declared_size,
262            effective_size: frame.effective_size,
263            mode: frame.mode,
264            uid: frame.uid,
265            gid: frame.gid,
266            mtime: frame.mtime,
267            uname: &self.uname,
268            gname: &self.gname,
269            header_path: &self.path,
270            link_name: &self.link_name,
271        }
272    }
273}
274
275fn copy_string_field_into(block: &Block, range: Range<usize>, destination: &mut Vec<u8>) {
276    let field = &block[range];
277    let len = field
278        .iter()
279        .position(|byte| *byte == 0)
280        .unwrap_or(field.len());
281    destination.clear();
282    destination.extend_from_slice(&field[..len]);
283}
284
285impl<R> TarReader<R> {
286    /// Creates a new logical reader from an uncompressed tar reader.
287    pub fn new(reader: R) -> Self {
288        Self {
289            global_pax_records: None,
290            payload: PayloadReader {
291                stream: TarStream::new(reader),
292                remaining: 0,
293                drain_buffer: Vec::new(),
294            },
295            header_storage: HeaderStorage::default(),
296            pending_extensions: PendingExtensions::default(),
297            extension_payload: None,
298        }
299    }
300
301    /// Sets the maximum size accepted for each subsequent pax extension.
302    ///
303    /// A local or global pax header that declares a larger payload is rejected
304    /// before any of its payload blocks are consumed. Setting the maximum to
305    /// [`u64::MAX`] removes the per-extension bound; global extensions remain
306    /// subject to their cumulative limit.
307    ///
308    /// See [`TarStream::set_max_pax_extension_size`].
309    pub fn set_max_pax_extension_size(&mut self, max_pax_extension_size: u64) {
310        self.payload
311            .stream
312            .set_max_pax_extension_size(max_pax_extension_size);
313    }
314
315    /// Sets the maximum cumulative size of global pax extensions before one member.
316    ///
317    /// A global header that would increase the pending total beyond this limit
318    /// is rejected before its payload is consumed. Setting the maximum to
319    /// [`u64::MAX`] removes the cumulative bound; each extension remains
320    /// subject to its individual limit.
321    ///
322    /// See [`TarStream::set_max_global_pax_extensions_size`].
323    pub fn set_max_global_pax_extensions_size(&mut self, max_global_pax_extensions_size: u64) {
324        self.payload
325            .stream
326            .set_max_global_pax_extensions_size(max_global_pax_extensions_size);
327    }
328
329    /// Sets whether wholly NUL numeric metadata fields may be accepted.
330    ///
331    /// See [`TarStream::set_allow_all_nul_numeric_fields`].
332    pub fn set_allow_all_nul_numeric_fields(&mut self, allow: bool) {
333        self.payload.stream.set_allow_all_nul_numeric_fields(allow);
334    }
335
336    /// Sets the maximum size accepted for each subsequent GNU metadata extension.
337    ///
338    /// A long-name or long-link header that declares a larger payload is
339    /// rejected before any of its payload blocks are consumed. Setting the
340    /// maximum to [`u64::MAX`] permits unbounded metadata buffering.
341    pub fn set_max_gnu_extension_size(&mut self, max_gnu_extension_size: u64) {
342        self.payload
343            .stream
344            .set_max_gnu_extension_size(max_gnu_extension_size);
345    }
346}
347
348impl<R: AsyncRead + Unpin> TarReader<R> {
349    /// Returns the next ordinary archive member.
350    ///
351    /// If the preceding member payload was not fully consumed, it is first
352    /// drained and validated. Extension metadata is then consumed and attached
353    /// before the next member is returned. Global pax updates not followed by
354    /// an ordinary member are consumed and ignored. A returned pax state is a
355    /// view borrowing this reader; it must be dropped before requesting another
356    /// member.
357    pub async fn next_frame(&mut self) -> Result<Option<MemberFrame<'_, R>>, FrameError> {
358        if let Err(error) = self.payload.drain_payload().await {
359            self.clear_extension_state();
360            return Err(error);
361        }
362
363        loop {
364            let frame = match self.payload.stream.next_frame().await {
365                Ok(Some(frame)) => frame,
366                Err(error) => {
367                    self.clear_extension_state();
368                    return Err(error);
369                }
370                Ok(None) => {
371                    self.clear_extension_state();
372                    return Ok(None);
373                }
374            };
375            match frame {
376                Frame::Pax(frame) => {
377                    self.extension_payload = Some(ExtensionPayload::Pax {
378                        position: frame.position,
379                        kind: frame.kind,
380                    });
381                }
382                Frame::Gnu(frame) => {
383                    if frame.payload_size == 0 {
384                        let metadata = GnuMetadata {
385                            position: frame.position,
386                            payload: Vec::new(),
387                        };
388                        self.pending_extensions.set_gnu(frame.kind, metadata);
389                    } else {
390                        self.extension_payload = Some(ExtensionPayload::Gnu {
391                            position: frame.position,
392                            kind: frame.kind,
393                            remaining: frame.payload_size,
394                            payload: Vec::new(),
395                        });
396                    }
397                }
398                Frame::Header(header) => {
399                    let pending_extensions = mem::take(&mut self.pending_extensions);
400                    let extensions = match header.format {
401                        ArchiveFormat::Pax => MemberExtensions::Pax(PaxState::new(
402                            self.global_pax_records.as_ref(),
403                            pending_extensions.global_pax,
404                            pending_extensions.local_pax,
405                        )),
406                        ArchiveFormat::Gnu => MemberExtensions::Gnu {
407                            long_name: pending_extensions.gnu_long_name,
408                            long_link: pending_extensions.gnu_long_link,
409                        },
410                    };
411                    self.payload.remaining = header.effective_size;
412                    let header = self.header_storage.update(&header);
413                    return Ok(Some(MemberFrame {
414                        header,
415                        extensions,
416                        payload: MemberPayload {
417                            reader: &mut self.payload,
418                        },
419                    }));
420                }
421                Frame::Data(frame) => {
422                    if let Err(error) = self.process_extension_data(frame) {
423                        self.clear_extension_state();
424                        return Err(error);
425                    }
426                }
427            }
428        }
429    }
430
431    fn clear_extension_state(&mut self) {
432        self.pending_extensions = PendingExtensions::default();
433        self.extension_payload = None;
434    }
435
436    fn process_extension_data(&mut self, frame: DataFrame) -> Result<(), FrameError> {
437        let Some(payload) = self.extension_payload.take() else {
438            return Err(FrameError::unexpected_order(
439                frame.position,
440                "extension header or ordinary member header",
441                "unattached payload data",
442            ));
443        };
444        match payload {
445            ExtensionPayload::Pax { position, kind } => {
446                if frame.owner != DataOwner::Pax(kind) {
447                    return Err(FrameError::unexpected_order(
448                        frame.position,
449                        "pax extension payload",
450                        "different payload data",
451                    ));
452                }
453                if let Some(records) = frame.into_completed_pax_records() {
454                    match kind {
455                        PaxKind::Global => {
456                            records.apply_global(&mut self.global_pax_records);
457                            self.pending_extensions
458                                .global_pax
459                                .push(PaxExtension::new(position, kind, records));
460                        }
461                        PaxKind::Local => {
462                            self.pending_extensions.local_pax =
463                                Some(PaxExtension::new(position, kind, records));
464                        }
465                    }
466                } else {
467                    self.extension_payload = Some(ExtensionPayload::Pax { position, kind });
468                }
469            }
470            ExtensionPayload::Gnu {
471                position,
472                kind,
473                mut remaining,
474                mut payload,
475            } => {
476                if frame.owner != DataOwner::Gnu(kind) {
477                    return Err(FrameError::unexpected_order(
478                        frame.position,
479                        "GNU metadata payload",
480                        "different payload data",
481                    ));
482                }
483                let len = u64::try_from(frame.len).map_err(|_| {
484                    FrameError::arithmetic_overflow(frame.position, "GNU metadata payload length")
485                })?;
486                remaining = remaining.checked_sub(len).ok_or_else(|| {
487                    FrameError::unexpected_order(
488                        frame.position,
489                        "bounded GNU metadata payload",
490                        "oversized GNU metadata payload",
491                    )
492                })?;
493                payload.extend_from_slice(&frame.block[..frame.len]);
494                if remaining == 0 {
495                    let metadata = GnuMetadata { position, payload };
496                    self.pending_extensions.set_gnu(kind, metadata);
497                } else {
498                    self.extension_payload = Some(ExtensionPayload::Gnu {
499                        position,
500                        kind,
501                        remaining,
502                        payload,
503                    });
504                }
505            }
506        }
507        Ok(())
508    }
509}
510
511impl<R: AsyncRead + Unpin> PayloadReader<R> {
512    async fn next_payload_block(&mut self) -> Result<Option<PayloadBlock>, FrameError> {
513        if self.remaining == 0 {
514            return Ok(None);
515        }
516        let (position, block, len) = self.stream.read_member_block().await?;
517        let payload_len = u64::try_from(len)
518            .map_err(|_| FrameError::arithmetic_overflow(position, "member payload length"))?;
519        self.remaining = self.remaining.checked_sub(payload_len).ok_or_else(|| {
520            FrameError::unexpected_order(
521                position,
522                "bounded member payload",
523                "oversized member payload",
524            )
525        })?;
526        Ok(Some(PayloadBlock {
527            position,
528            block,
529            len,
530        }))
531    }
532
533    async fn next_payload_chunk(
534        &mut self,
535        buffer: &mut Vec<u8>,
536        target_len: usize,
537    ) -> Result<bool, FrameError> {
538        if self.remaining == 0 {
539            return Ok(false);
540        }
541        let len = self.stream.read_member_chunk(buffer, target_len).await?;
542        let len = u64::try_from(len).map_err(|_| {
543            FrameError::arithmetic_overflow(self.stream.position, "member payload chunk length")
544        })?;
545        self.remaining = self.remaining.checked_sub(len).ok_or_else(|| {
546            FrameError::unexpected_order(
547                self.stream.position,
548                "bounded member payload",
549                "oversized member payload chunk",
550            )
551        })?;
552        Ok(true)
553    }
554
555    async fn drain_payload(&mut self) -> Result<(), FrameError> {
556        let mut buffer = mem::take(&mut self.drain_buffer);
557        let result = loop {
558            match self
559                .next_payload_chunk(&mut buffer, PAYLOAD_DRAIN_CHUNK_BYTES)
560                .await
561            {
562                Ok(true) => {}
563                Ok(false) => break Ok(()),
564                Err(error) => break Err(error),
565            }
566        };
567        self.drain_buffer = buffer;
568        result
569    }
570}
571
572impl<R: AsyncRead + Unpin> MemberPayload<'_, R> {
573    /// Returns the next meaningful payload block, excluding final padding in `len`.
574    pub async fn next_block(&mut self) -> Result<Option<PayloadBlock>, FrameError> {
575        self.reader.next_payload_block().await
576    }
577
578    /// Reads validated payload bytes into a reusable chunk buffer.
579    ///
580    /// When this returns `true`, the buffer's existing contents are replaced.
581    /// When the payload is exhausted, it returns `false` without changing the
582    /// buffer so its initialized storage can be reused. Complete physical blocks
583    /// are read directly into it until the chunk contains at least `target_len`
584    /// bytes or the payload ends. The target is raised to one physical block
585    /// when it is smaller, and final-block padding is removed before this
586    /// returns. This preserves [`Self::next_block`] as the lossless interface
587    /// while allowing higher-level consumers to amortize per-block bookkeeping
588    /// and copies.
589    pub async fn next_chunk(
590        &mut self,
591        buffer: &mut Vec<u8>,
592        target_len: usize,
593    ) -> Result<bool, FrameError> {
594        self.reader.next_payload_chunk(buffer, target_len).await
595    }
596
597    /// Discards and validates all remaining payload bytes using reusable storage.
598    pub async fn skip(self) -> Result<(), FrameError> {
599        self.reader.drain_payload().await
600    }
601}
602
603fn effective_member_path<'a>(
604    header: &Header<'a>,
605    extensions: &'a MemberExtensions<'_>,
606) -> Result<Cow<'a, [u8]>, FrameError> {
607    match extensions {
608        MemberExtensions::Pax(state) => resolve_pax_text(
609            header.position,
610            state,
611            &PaxKeyword::Path,
612            "path",
613            Cow::Borrowed(header.header_path),
614            |record| match record {
615                PaxRecord::Path(value) => Some(value),
616                _ => None,
617            },
618        ),
619        MemberExtensions::Gnu { long_name, .. } => match long_name {
620            Some(metadata) => Ok(Cow::Borrowed(parse_gnu_metadata(
621                metadata,
622                GnuKind::LongName,
623            )?)),
624            None => Ok(Cow::Borrowed(header.header_path)),
625        },
626    }
627}
628
629fn reject_nul(position: u64, field: &'static str, value: &[u8]) -> Result<(), FrameError> {
630    if value.contains(&0) {
631        return Err(FrameError::at(
632            position,
633            FrameErrorInner::NulInMemberName { field },
634        ));
635    }
636    Ok(())
637}
638
639fn resolve_pax_text<'a>(
640    position: u64,
641    state: &'a PaxState<'_>,
642    keyword: &PaxKeyword,
643    field: &'static str,
644    header_value: Cow<'a, [u8]>,
645    select: fn(&PaxRecord) -> Option<&PaxValue<PaxString>>,
646) -> Result<Cow<'a, [u8]>, FrameError> {
647    if let Some(value) = state.effective_record(keyword).and_then(select) {
648        return pax_value(position, field, value);
649    }
650    Ok(header_value)
651}
652
653/// Return the raw bytes of a pax record, erroring if the record is a tombstone
654/// (i.e.) explicitly deleted.
655fn pax_value<'a>(
656    position: u64,
657    keyword: &'static str,
658    value: &'a PaxValue<PaxString>,
659) -> Result<Cow<'a, [u8]>, FrameError> {
660    match value {
661        PaxValue::Value(PaxString::Utf8(value)) => Ok(Cow::Borrowed(value.as_bytes())),
662        PaxValue::Value(PaxString::Binary(value)) => Ok(Cow::Borrowed(value.as_ref())),
663        // A pax value that has been explicitly deleted does *not*
664        // result in a fallthrough to the corresponding ustar header value:
665        //
666        // "If a keyword in an extended header record (or in a -o option-
667        // argument) overrides or deletes a corresponding field in the ustar
668        // header block, pax shall ignore the contents of that header block
669        // field."
670        //
671        // See: pax spec, "pax Extended Header"
672        PaxValue::Deleted => Err(FrameError::deleted_pax_metadata(position, keyword)),
673    }
674}
675
676fn parse_gnu_metadata(metadata: &GnuMetadata, kind: GnuKind) -> Result<&[u8], FrameError> {
677    let terminator = metadata
678        .payload
679        .iter()
680        .position(|byte| *byte == 0)
681        .ok_or_else(|| {
682            FrameError::invalid_gnu_metadata(metadata.position, kind, "value is not NUL-terminated")
683        })?;
684
685    // TODO: Make this configurable through some kind of policy?
686    // Might be overly strict in practice.
687    if metadata.payload[terminator..].iter().any(|byte| *byte != 0) {
688        return Err(FrameError::invalid_gnu_metadata(
689            metadata.position,
690            kind,
691            "non-NUL bytes follow the terminator",
692        ));
693    }
694    Ok(&metadata.payload[..terminator])
695}
696
697#[cfg(test)]
698mod tests {
699    use tokio::io::AsyncRead;
700
701    use super::*;
702    use crate::{
703        BLOCK_SIZE, DEFAULT_MAX_GNU_EXTENSION_SIZE, FrameError, FrameErrorInner, PaxRecord,
704        PaxValue,
705        header::{
706            GID_RANGE, GNAME_RANGE, LINK_NAME_RANGE, MODE_RANGE, MTIME_RANGE, NAME_RANGE,
707            PREFIX_RANGE, TYPEFLAG_OFFSET, UID_RANGE, UNAME_RANGE,
708        },
709        stream::DataOwner,
710        test_support::{
711            ChunkedReader, append_block, append_gnu, append_pax, append_payload, append_terminator,
712            cancel_pending, gnu_header, header, ready, ready_ok, record, set_checksum,
713        },
714    };
715
716    fn set_field(block: &mut Block, range: std::ops::Range<usize>, value: &[u8]) {
717        block[range.clone()].fill(0);
718        block[range.start..range.start + value.len()].copy_from_slice(value);
719    }
720
721    async fn next_member<R: AsyncRead + Unpin>(
722        reader: &mut TarReader<R>,
723    ) -> Result<MemberFrame<'_, R>, FrameError> {
724        let Some(member) = reader.next_frame().await? else {
725            panic!("expected logical member");
726        };
727        Ok(member)
728    }
729
730    fn pax_state<'a, R>(member: &'a MemberFrame<'_, R>) -> Option<&'a PaxState<'a>> {
731        if let MemberExtensions::Pax(state) = &member.extensions {
732            Some(state)
733        } else {
734            None
735        }
736    }
737
738    fn member_followed_by_empty_member(payload: &[u8]) -> (Vec<u8>, u64) {
739        let mut bytes = Vec::new();
740        append_pax(&mut bytes, b'0', payload);
741        let next_position = u64::try_from(bytes.len()).expect("test position should fit u64");
742        append_block(&mut bytes, &header(b'0', 0));
743        append_terminator(&mut bytes);
744        (bytes, next_position)
745    }
746
747    #[test]
748    fn exposes_ordinary_header_metadata_and_decodes_modes() {
749        let mut ustar_header = header(b'2', 0);
750        set_field(&mut ustar_header, NAME_RANGE, b"file");
751        set_field(&mut ustar_header, PREFIX_RANGE, b"dir");
752        set_field(&mut ustar_header, LINK_NAME_RANGE, b"target");
753        ustar_header[MODE_RANGE].copy_from_slice(b"0100644\0");
754        ustar_header[UID_RANGE].copy_from_slice(b"0000001\0");
755        ustar_header[GID_RANGE].copy_from_slice(b"0000002\0");
756        ustar_header[MTIME_RANGE].copy_from_slice(b"00000000003\0");
757        set_field(&mut ustar_header, UNAME_RANGE, b"user");
758        set_field(&mut ustar_header, GNAME_RANGE, b"group");
759        set_checksum(&mut ustar_header);
760
761        let mut empty_header = header(b'0', 0);
762        for range in [
763            MODE_RANGE,
764            UID_RANGE,
765            GID_RANGE,
766            MTIME_RANGE,
767            UNAME_RANGE,
768            GNAME_RANGE,
769        ] {
770            empty_header[range].fill(0);
771        }
772        set_checksum(&mut empty_header);
773
774        ready_ok(async {
775            let mut bytes = Vec::new();
776            append_block(&mut bytes, &ustar_header);
777            append_block(&mut bytes, &empty_header);
778            append_terminator(&mut bytes);
779            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
780            {
781                let member = next_member(&mut reader).await?;
782                assert_eq!(member.header.format, ArchiveFormat::Pax);
783                assert_eq!(member.header.header_path, b"dir/file");
784                assert_eq!(member.header.link_name, b"target");
785                assert_eq!(member.header.mode, Some(0o100644));
786                assert_eq!(member.header.uid, Some(1));
787                assert_eq!(member.header.gid, Some(2));
788                assert_eq!(member.header.mtime, Some(3));
789                assert_eq!(member.header.uname, b"user");
790                assert_eq!(member.header.gname, b"group");
791                assert_eq!(member.effective_path()?.as_ref(), b"dir/file");
792                assert_eq!(member.effective_link_path()?.as_ref(), b"target");
793            }
794            let member = next_member(&mut reader).await?;
795            assert_eq!(member.header.mode, None);
796            assert_eq!(member.header.uid, None);
797            assert_eq!(member.header.gid, None);
798            assert_eq!(member.header.mtime, None);
799            assert!(member.header.uname.is_empty());
800            assert!(member.header.gname.is_empty());
801            Ok(())
802        });
803
804        let mut gnu_member_header = gnu_header(b'0', 0);
805        set_field(&mut gnu_member_header, NAME_RANGE, b"name");
806        set_field(&mut gnu_member_header, PREFIX_RANGE, b"ignored");
807        gnu_member_header[MODE_RANGE].fill(0);
808        gnu_member_header[MODE_RANGE.start] = 0x80;
809        gnu_member_header[MODE_RANGE.end - 2..MODE_RANGE.end].copy_from_slice(&[0x81, 0xa4]);
810        set_checksum(&mut gnu_member_header);
811
812        let mut empty_gnu_header = gnu_header(b'0', 0);
813        for range in [MODE_RANGE, UID_RANGE, GID_RANGE, MTIME_RANGE] {
814            empty_gnu_header[range].fill(0);
815        }
816        set_checksum(&mut empty_gnu_header);
817
818        ready_ok(async {
819            let mut bytes = Vec::new();
820            append_block(&mut bytes, &gnu_member_header);
821            append_block(&mut bytes, &empty_gnu_header);
822            append_terminator(&mut bytes);
823            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
824            {
825                let member = next_member(&mut reader).await?;
826                assert_eq!(member.header.format, ArchiveFormat::Gnu);
827                assert_eq!(member.header.header_path, b"name");
828                assert_eq!(member.header.mode, Some(0o100644));
829                assert_eq!(member.header.uid, Some(0));
830                assert_eq!(member.header.gid, Some(0));
831                assert_eq!(member.header.mtime, Some(0));
832            }
833            let member = next_member(&mut reader).await?;
834            assert_eq!(member.header.mode, None);
835            assert_eq!(member.header.uid, None);
836            assert_eq!(member.header.gid, None);
837            assert_eq!(member.header.mtime, None);
838            Ok(())
839        });
840    }
841
842    #[test]
843    fn preserves_ustar_separator_when_name_is_empty() {
844        let mut ustar_header = header(b'5', 0);
845        set_field(&mut ustar_header, NAME_RANGE, b"");
846        set_field(&mut ustar_header, PREFIX_RANGE, b"victim");
847        set_checksum(&mut ustar_header);
848
849        ready_ok(async {
850            let mut bytes = Vec::new();
851            append_block(&mut bytes, &ustar_header);
852            append_terminator(&mut bytes);
853            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
854            let member = next_member(&mut reader).await?;
855            assert_eq!(member.header.header_path, b"victim/");
856            assert_eq!(member.effective_path()?.as_ref(), b"victim/");
857            Ok(())
858        });
859    }
860
861    #[test]
862    fn keeps_borrowed_header_metadata_available_while_streaming_payload() {
863        let mut member_header = header(b'0', 1);
864        set_field(&mut member_header, NAME_RANGE, b"file");
865        set_field(&mut member_header, PREFIX_RANGE, b"dir");
866        set_field(&mut member_header, LINK_NAME_RANGE, b"target");
867        member_header[MODE_RANGE].copy_from_slice(b"0000755\0");
868        set_checksum(&mut member_header);
869
870        ready_ok(async {
871            let mut bytes = Vec::new();
872            append_block(&mut bytes, &member_header);
873            append_payload(&mut bytes, b"x");
874            append_terminator(&mut bytes);
875            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
876            let mut member = next_member(&mut reader).await?;
877
878            assert!(member.payload.next_block().await?.is_some());
879            assert_eq!(member.header.header_path, b"dir/file");
880            assert_eq!(member.header.link_name, b"target");
881            assert_eq!(member.header.mode, Some(0o755));
882            assert_eq!(member.effective_path()?.as_ref(), b"dir/file");
883            assert_eq!(member.effective_link_path()?.as_ref(), b"target");
884            Ok(())
885        });
886    }
887
888    #[test]
889    fn resolves_pax_path_precedence_and_deletions() {
890        let mut global = record("path", "global");
891        global.extend_from_slice(&record("linkpath", "global-link"));
892        let mut local = record("path", "local");
893        local.extend_from_slice(&record("linkpath", ""));
894        let mut bytes = Vec::new();
895        append_pax(&mut bytes, b'g', &global);
896        append_pax(&mut bytes, b'x', &local);
897        append_block(&mut bytes, &header(b'2', 0));
898        append_block(&mut bytes, &header(b'2', 0));
899        append_terminator(&mut bytes);
900
901        ready_ok(async {
902            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
903            {
904                let member = next_member(&mut reader).await?;
905                assert_eq!(member.effective_path()?.as_ref(), b"local");
906                assert!(matches!(
907                    member.effective_link_path(),
908                    Err(FrameError {
909                        position: 2048,
910                        inner: FrameErrorInner::DeletedPaxMetadata {
911                            keyword: "linkpath"
912                        },
913                    })
914                ));
915            }
916            let member = next_member(&mut reader).await?;
917            assert_eq!(member.effective_path()?.as_ref(), b"global");
918            assert_eq!(member.effective_link_path()?.as_ref(), b"global-link");
919            Ok(())
920        });
921    }
922
923    #[test]
924    fn rejects_empty_effective_member_paths() {
925        for (case, mut bytes) in [
926            ("pax-header", {
927                let mut bytes = Vec::new();
928                let mut member = header(b'0', 0);
929                set_field(&mut member, NAME_RANGE, b"");
930                set_field(&mut member, PREFIX_RANGE, b"");
931                set_checksum(&mut member);
932                append_block(&mut bytes, &member);
933                bytes
934            }),
935            ("gnu-header", {
936                let mut bytes = Vec::new();
937                let mut member = gnu_header(b'0', 0);
938                set_field(&mut member, NAME_RANGE, b"");
939                set_checksum(&mut member);
940                append_block(&mut bytes, &member);
941                bytes
942            }),
943            ("gnu-long-name", {
944                let mut bytes = Vec::new();
945                append_gnu(&mut bytes, b'L', b"\0");
946                let mut member = gnu_header(b'0', 0);
947                set_field(&mut member, NAME_RANGE, b"physical");
948                set_checksum(&mut member);
949                append_block(&mut bytes, &member);
950                bytes
951            }),
952        ] {
953            append_terminator(&mut bytes);
954            let result: Result<(), FrameError> = ready(async {
955                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
956                let member = next_member(&mut reader).await?;
957                member.effective_path().map(|_| ())
958            });
959            assert!(
960                matches!(
961                    result,
962                    Err(FrameError {
963                        inner: FrameErrorInner::EmptyMemberPath,
964                        ..
965                    })
966                ),
967                "{case}: {result:?}"
968            );
969        }
970    }
971
972    #[test]
973    fn rejects_nul_in_effective_member_names() {
974        for (field, mut bytes) in [
975            ("path", {
976                let mut bytes = Vec::new();
977                append_pax(&mut bytes, b'x', &record("path", "bad\0name"));
978                append_block(&mut bytes, &header(b'0', 0));
979                bytes
980            }),
981            ("link path", {
982                let mut bytes = Vec::new();
983                append_pax(&mut bytes, b'x', &record("linkpath", "bad\0target"));
984                append_block(&mut bytes, &header(b'2', 0));
985                bytes
986            }),
987        ] {
988            append_terminator(&mut bytes);
989            let result: Result<(), FrameError> = ready(async {
990                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
991                let member = next_member(&mut reader).await?;
992                if field == "path" {
993                    member.effective_path().map(|_| ())
994                } else {
995                    member.effective_link_path().map(|_| ())
996                }
997            });
998            assert!(
999                matches!(
1000                    result,
1001                    Err(FrameError {
1002                        inner: FrameErrorInner::NulInMemberName { field: found },
1003                        ..
1004                    }) if found == field
1005                ),
1006                "{field}: {result:?}"
1007            );
1008        }
1009    }
1010
1011    #[test]
1012    fn ignores_nul_in_overridden_pax_member_names() {
1013        let mut global = record("path", "bad\0name");
1014        global.extend_from_slice(&record("linkpath", "bad\0target"));
1015        let mut local = record("path", "good-name");
1016        local.extend_from_slice(&record("linkpath", "good-target"));
1017        let mut bytes = Vec::new();
1018        append_pax(&mut bytes, b'g', &global);
1019        append_pax(&mut bytes, b'x', &local);
1020        append_block(&mut bytes, &header(b'2', 0));
1021        append_terminator(&mut bytes);
1022
1023        ready_ok(async {
1024            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1025            let member = next_member(&mut reader).await?;
1026            assert_eq!(member.effective_path()?.as_ref(), b"good-name");
1027            assert_eq!(member.effective_link_path()?.as_ref(), b"good-target");
1028            Ok(())
1029        });
1030    }
1031
1032    #[test]
1033    fn accepts_nonempty_extension_paths_over_empty_header_names() {
1034        for (case, mut bytes, expected) in [
1035            (
1036                "pax",
1037                {
1038                    let mut bytes = Vec::new();
1039                    append_pax(&mut bytes, b'x', &record("path", "pax-name"));
1040                    let mut member = header(b'0', 0);
1041                    set_field(&mut member, NAME_RANGE, b"");
1042                    set_field(&mut member, PREFIX_RANGE, b"");
1043                    set_checksum(&mut member);
1044                    append_block(&mut bytes, &member);
1045                    bytes
1046                },
1047                b"pax-name".as_slice(),
1048            ),
1049            (
1050                "gnu",
1051                {
1052                    let mut bytes = Vec::new();
1053                    append_gnu(&mut bytes, b'L', b"gnu-name\0");
1054                    let mut member = gnu_header(b'0', 0);
1055                    set_field(&mut member, NAME_RANGE, b"");
1056                    set_checksum(&mut member);
1057                    append_block(&mut bytes, &member);
1058                    bytes
1059                },
1060                b"gnu-name".as_slice(),
1061            ),
1062        ] {
1063            append_terminator(&mut bytes);
1064            ready_ok(async {
1065                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1066                let member = next_member(&mut reader).await?;
1067                assert_eq!(member.effective_path()?.as_ref(), expected, "{case}");
1068                Ok(())
1069            });
1070        }
1071    }
1072
1073    #[test]
1074    fn global_path_deletion_suppresses_the_physical_header_path() {
1075        let mut physical_header = header(b'0', 0);
1076        set_field(&mut physical_header, NAME_RANGE, b"physical");
1077        set_checksum(&mut physical_header);
1078
1079        let mut bytes = Vec::new();
1080        append_pax(&mut bytes, b'g', &record("path", "global"));
1081        append_block(&mut bytes, &header(b'0', 0));
1082        append_pax(&mut bytes, b'g', &record("path", ""));
1083        append_block(&mut bytes, &physical_header);
1084        append_terminator(&mut bytes);
1085
1086        ready_ok(async {
1087            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1088            {
1089                let member = next_member(&mut reader).await?;
1090                assert_eq!(member.effective_path()?.as_ref(), b"global");
1091            }
1092
1093            let member = next_member(&mut reader).await?;
1094            assert!(matches!(
1095                member.effective_path(),
1096                Err(FrameError {
1097                    inner: FrameErrorInner::DeletedPaxMetadata { keyword: "path" },
1098                    ..
1099                })
1100            ));
1101            let state = pax_state(&member).expect("expected pax member metadata");
1102            assert_eq!(
1103                state.effective_record(&PaxKeyword::Path),
1104                Some(&PaxRecord::Path(PaxValue::Deleted))
1105            );
1106            let extensions = state.extensions().collect::<Vec<_>>();
1107            assert_eq!(extensions.len(), 1);
1108            assert!(matches!(
1109                extensions[0].records(),
1110                [PaxRecord::Path(PaxValue::Deleted)]
1111            ));
1112            Ok(())
1113        });
1114    }
1115
1116    #[test]
1117    fn resolves_and_validates_gnu_metadata_lazily() {
1118        let mut bytes = Vec::new();
1119        append_block(&mut bytes, &gnu_header(b'L', 5));
1120        append_payload(&mut bytes, b"name\0");
1121        append_block(&mut bytes, &gnu_header(b'K', 5));
1122        append_payload(&mut bytes, b"link\0");
1123        append_block(&mut bytes, &gnu_header(b'2', 0));
1124        append_terminator(&mut bytes);
1125
1126        ready_ok(async {
1127            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1128            let member = next_member(&mut reader).await?;
1129            assert_eq!(member.effective_path()?.as_ref(), b"name");
1130            assert_eq!(member.effective_link_path()?.as_ref(), b"link");
1131            Ok(())
1132        });
1133
1134        for (typeflag, payload, kind) in [
1135            (b'L', b"no-nul".as_slice(), GnuKind::LongName),
1136            (b'K', b"link\0bad".as_slice(), GnuKind::LongLink),
1137        ] {
1138            let mut bytes = Vec::new();
1139            append_gnu(&mut bytes, typeflag, payload);
1140            append_block(&mut bytes, &gnu_header(b'2', 0));
1141            append_terminator(&mut bytes);
1142            let result: Result<(), FrameError> = ready(async {
1143                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1144                let member = next_member(&mut reader).await?;
1145                match kind {
1146                    GnuKind::LongName => member.effective_path().map(|_| ()),
1147                    GnuKind::LongLink => member.effective_link_path().map(|_| ()),
1148                }
1149            });
1150            assert!(matches!(
1151                result,
1152                Err(FrameError {
1153                    position: 0,
1154                    inner: FrameErrorInner::InvalidGnuMetadata { kind: found, .. },
1155                }) if found == kind
1156            ));
1157        }
1158    }
1159
1160    #[test]
1161    fn groups_pax_metadata_and_streams_member_payload() {
1162        let mut global = record("comment", "first");
1163        global.extend_from_slice(&record("comment", "last"));
1164        let mut local = record("path", "renamed");
1165        local.extend_from_slice(&record("size", "513"));
1166        let mut bytes = Vec::new();
1167        append_pax(&mut bytes, b'g', &global);
1168        append_pax(&mut bytes, b'x', &local);
1169        append_block(&mut bytes, &header(b'0', 1));
1170        append_payload(&mut bytes, &[b'a'; BLOCK_SIZE]);
1171        append_payload(&mut bytes, b"b");
1172        append_terminator(&mut bytes);
1173
1174        ready_ok(async {
1175            let mut reader = TarReader::new(ChunkedReader::new(bytes, 17));
1176            {
1177                let mut member = next_member(&mut reader).await?;
1178                assert_eq!(member.header.effective_size, 513);
1179                let state = pax_state(&member).expect("expected pax member metadata");
1180                let extensions = state.extensions().collect::<Vec<_>>();
1181                assert_eq!(extensions.len(), 2);
1182                assert_eq!(extensions[0].position, 0);
1183                assert_eq!(extensions[0].kind, PaxKind::Global);
1184                assert_eq!(
1185                    extensions[0].records(),
1186                    [
1187                        PaxRecord::Comment(PaxValue::Value("first".into())),
1188                        PaxRecord::Comment(PaxValue::Value("last".into())),
1189                    ]
1190                );
1191                assert_eq!(extensions[1].position, (BLOCK_SIZE * 2) as u64);
1192                assert_eq!(extensions[1].kind, PaxKind::Local);
1193                assert_eq!(
1194                    state.effective_record(&PaxKeyword::Size),
1195                    Some(&PaxRecord::Size(PaxValue::Value(513)))
1196                );
1197                assert_eq!(
1198                    state.effective_record(&PaxKeyword::Comment),
1199                    Some(&PaxRecord::Comment(PaxValue::Value("last".into())))
1200                );
1201                let Some(first) = member.payload.next_block().await? else {
1202                    panic!("expected first member payload block");
1203                };
1204                let Some(last) = member.payload.next_block().await? else {
1205                    panic!("expected last member payload block");
1206                };
1207                assert_eq!(first.len, BLOCK_SIZE);
1208                assert_eq!(last.len, 1);
1209                assert!(member.payload.next_block().await?.is_none());
1210            }
1211            assert!(reader.next_frame().await?.is_none());
1212            Ok(())
1213        });
1214    }
1215
1216    #[test]
1217    fn bounds_cumulative_global_pax_extension_payloads() {
1218        let payload = record("comment", "metadata");
1219        let payload_size = u64::try_from(payload.len()).expect("payload size should fit u64");
1220        let limit = payload_size
1221            .checked_mul(2)
1222            .expect("test payload total should fit u64");
1223
1224        let mut rejected = Vec::new();
1225        append_pax(&mut rejected, b'g', &payload);
1226        append_pax(&mut rejected, b'g', &payload);
1227        let rejected_position =
1228            u64::try_from(rejected.len()).expect("test position should fit u64");
1229        append_block(&mut rejected, &header(b'g', payload_size));
1230        let error: Result<(), FrameError> = ready(async {
1231            let mut reader = TarReader::new(ChunkedReader::new(rejected, BLOCK_SIZE));
1232            reader.set_max_global_pax_extensions_size(limit);
1233            reader.next_frame().await.map(|_| ())
1234        });
1235        assert!(matches!(
1236            error,
1237            Err(FrameError {
1238                position,
1239                inner: FrameErrorInner::GlobalPaxExtensionsTooLarge {
1240                    size,
1241                    limit: found_limit,
1242                },
1243            }) if position == rejected_position
1244                && size == payload_size * 3
1245                && found_limit == limit
1246        ));
1247
1248        let mut accepted = Vec::new();
1249        for _ in 0..2 {
1250            for _ in 0..3 {
1251                append_pax(&mut accepted, b'g', &payload);
1252            }
1253            append_block(&mut accepted, &header(b'0', 0));
1254        }
1255        append_terminator(&mut accepted);
1256        ready_ok(async {
1257            let mut reader = TarReader::new(ChunkedReader::new(accepted, BLOCK_SIZE));
1258            reader.set_max_global_pax_extensions_size(payload_size * 3);
1259            for _ in 0..2 {
1260                let member = next_member(&mut reader).await?;
1261                assert_eq!(
1262                    pax_state(&member)
1263                        .expect("expected pax member metadata")
1264                        .extensions()
1265                        .count(),
1266                    3
1267                );
1268            }
1269            Ok(())
1270        });
1271    }
1272
1273    #[test]
1274    fn retains_global_pax_extension_across_cancelled_reads() {
1275        let mut bytes = Vec::new();
1276        append_pax(&mut bytes, b'g', &record("comment", "metadata"));
1277        let after_extension_header = BLOCK_SIZE;
1278        let after_extension_payload = bytes.len();
1279        append_block(&mut bytes, &header(b'0', 0));
1280        append_terminator(&mut bytes);
1281
1282        for pending_at in [after_extension_header, after_extension_payload] {
1283            let mut reader = TarReader::new(ChunkedReader::pending_once(bytes.clone(), pending_at));
1284            cancel_pending(reader.next_frame());
1285
1286            ready_ok(async {
1287                let member = next_member(&mut reader).await?;
1288                let state = pax_state(&member).expect("expected pax member metadata");
1289                let extensions = state.extensions().collect::<Vec<_>>();
1290                assert_eq!(extensions.len(), 1);
1291                assert_eq!(extensions[0].position, 0);
1292                assert_eq!(extensions[0].kind, PaxKind::Global);
1293                assert_eq!(
1294                    extensions[0].records(),
1295                    &[PaxRecord::Comment(PaxValue::Value("metadata".into()))]
1296                );
1297                Ok(())
1298            });
1299        }
1300    }
1301
1302    #[test]
1303    fn retains_gnu_metadata_across_cancelled_reads() {
1304        let expected_name = vec![b'n'; BLOCK_SIZE + 10];
1305        let mut long_name = expected_name.clone();
1306        long_name.push(0);
1307
1308        let mut bytes = Vec::new();
1309        append_gnu(&mut bytes, b'L', &long_name);
1310        let after_first_payload_block = BLOCK_SIZE * 2;
1311        append_block(&mut bytes, &gnu_header(b'0', 0));
1312        append_terminator(&mut bytes);
1313
1314        let mut reader = TarReader::new(ChunkedReader::pending_once(
1315            bytes,
1316            after_first_payload_block,
1317        ));
1318        cancel_pending(reader.next_frame());
1319
1320        ready_ok(async {
1321            let member = next_member(&mut reader).await?;
1322            assert_eq!(member.effective_path()?.as_ref(), expected_name);
1323            Ok(())
1324        });
1325
1326        let mut bytes = Vec::new();
1327        append_gnu(&mut bytes, b'L', &[]);
1328        let after_extension_header = bytes.len();
1329        append_block(&mut bytes, &gnu_header(b'0', 0));
1330        append_terminator(&mut bytes);
1331        let mut reader = TarReader::new(ChunkedReader::pending_once(bytes, after_extension_header));
1332        cancel_pending(reader.next_frame());
1333
1334        ready_ok(async {
1335            let member = next_member(&mut reader).await?;
1336            assert!(matches!(
1337                &member.extensions,
1338                MemberExtensions::Gnu {
1339                    long_name: Some(GnuMetadata { payload, .. }),
1340                    ..
1341                } if payload.is_empty()
1342            ));
1343            Ok(())
1344        });
1345    }
1346
1347    #[test]
1348    fn applies_global_pax_updates_to_each_borrowed_state() {
1349        let first = record("comment", "first");
1350        let second = record("gname", "second");
1351        let replacement = record("comment", "replacement");
1352        let mut bytes = Vec::new();
1353        append_pax(&mut bytes, b'g', &first);
1354        append_pax(&mut bytes, b'g', &second);
1355        append_block(&mut bytes, &header(b'0', 0));
1356        append_block(&mut bytes, &header(b'0', 0));
1357        append_pax(&mut bytes, b'g', &replacement);
1358        append_block(&mut bytes, &header(b'0', 0));
1359        append_terminator(&mut bytes);
1360
1361        ready_ok(async {
1362            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1363            {
1364                let member = next_member(&mut reader).await?;
1365                let state = pax_state(&member).expect("expected pax member metadata");
1366                let extensions = state.extensions().collect::<Vec<_>>();
1367                assert_eq!(extensions.len(), 2);
1368                assert_eq!(extensions[0].position, 0);
1369                assert_eq!(extensions[1].position, (BLOCK_SIZE * 2) as u64);
1370                assert_eq!(
1371                    state.effective_record(&PaxKeyword::Comment),
1372                    Some(&PaxRecord::Comment(PaxValue::Value("first".into())))
1373                );
1374            }
1375            {
1376                let member = next_member(&mut reader).await?;
1377                let state = pax_state(&member).expect("expected pax member metadata");
1378                assert_eq!(state.extensions().count(), 0);
1379                assert_eq!(
1380                    state.effective_record(&PaxKeyword::Comment),
1381                    Some(&PaxRecord::Comment(PaxValue::Value("first".into())))
1382                );
1383            }
1384
1385            let member = next_member(&mut reader).await?;
1386            let state = pax_state(&member).expect("expected pax member metadata");
1387            let extensions = state.extensions().collect::<Vec<_>>();
1388            assert_eq!(extensions.len(), 1);
1389            assert_eq!(extensions[0].kind, PaxKind::Global);
1390            assert_eq!(
1391                state.effective_record(&PaxKeyword::Comment),
1392                Some(&PaxRecord::Comment(PaxValue::Value("replacement".into())))
1393            );
1394            Ok(())
1395        });
1396    }
1397
1398    #[test]
1399    fn streams_member_payload_in_reusable_chunks() {
1400        let payload = (0..BLOCK_SIZE * 3 + 7)
1401            .map(|index| u8::try_from(index % 251).unwrap())
1402            .collect::<Vec<_>>();
1403        let mut bytes = Vec::new();
1404        append_pax(&mut bytes, b'0', &payload);
1405        append_terminator(&mut bytes);
1406
1407        ready_ok(async {
1408            let mut reader = TarReader::new(ChunkedReader::new(bytes, 17));
1409            let mut member = next_member(&mut reader).await?;
1410            let mut chunk = vec![b'x'; BLOCK_SIZE * 2];
1411            assert!(
1412                member
1413                    .payload
1414                    .next_chunk(&mut chunk, BLOCK_SIZE + 1)
1415                    .await?
1416            );
1417            let allocation = chunk.as_ptr();
1418            assert_eq!(chunk, payload[..BLOCK_SIZE * 2]);
1419            assert!(
1420                member
1421                    .payload
1422                    .next_chunk(&mut chunk, BLOCK_SIZE + 1)
1423                    .await?
1424            );
1425            assert_eq!(chunk.as_ptr(), allocation);
1426            assert_eq!(chunk, payload[BLOCK_SIZE * 2..]);
1427            assert!(
1428                !member
1429                    .payload
1430                    .next_chunk(&mut chunk, BLOCK_SIZE + 1)
1431                    .await?
1432            );
1433            assert_eq!(chunk, payload[BLOCK_SIZE * 2..]);
1434            assert!(reader.next_frame().await?.is_none());
1435            Ok(())
1436        });
1437    }
1438
1439    #[test]
1440    fn resumes_cancelled_member_payload_chunk_with_either_read_api() {
1441        let payload = (0..BLOCK_SIZE * 2 + 17)
1442            .map(|index| u8::try_from(index % 251).expect("test byte should fit"))
1443            .collect::<Vec<_>>();
1444        let (bytes, next_member_position) = member_followed_by_empty_member(&payload);
1445
1446        ready_ok(async {
1447            let mut reader = TarReader::new(ChunkedReader::pending_once(bytes, BLOCK_SIZE + 73));
1448            {
1449                let mut member = next_member(&mut reader).await?;
1450                let mut cancelled_buffer = vec![b'x'; 17];
1451                cancel_pending(
1452                    member
1453                        .payload
1454                        .next_chunk(&mut cancelled_buffer, payload.len()),
1455                );
1456                assert!(cancelled_buffer.is_empty());
1457
1458                let first = member
1459                    .payload
1460                    .next_block()
1461                    .await?
1462                    .expect("cancelled chunk should resume as a payload block");
1463                let mut resumed_buffer = vec![b'y'; 23];
1464                assert!(member.payload.next_chunk(&mut resumed_buffer, 1).await?);
1465                let mut observed = first.block[..first.len].to_vec();
1466                observed.extend_from_slice(&resumed_buffer);
1467                assert_eq!(observed, payload);
1468                assert!(!member.payload.next_chunk(&mut resumed_buffer, 1).await?);
1469            }
1470
1471            let member = next_member(&mut reader).await?;
1472            assert_eq!(member.header.position, next_member_position);
1473            Ok(())
1474        });
1475    }
1476
1477    #[test]
1478    fn resumes_cancelled_member_payload_block_during_automatic_drain() {
1479        let payload = vec![b'x'; BLOCK_SIZE * 2 + 17];
1480        let (bytes, next_member_position) = member_followed_by_empty_member(&payload);
1481
1482        ready_ok(async {
1483            let mut reader = TarReader::new(ChunkedReader::pending_once(bytes, BLOCK_SIZE + 73));
1484            {
1485                let mut member = next_member(&mut reader).await?;
1486                cancel_pending(member.payload.next_block());
1487            }
1488
1489            let member = next_member(&mut reader).await?;
1490            assert_eq!(member.header.position, next_member_position);
1491            drop(member);
1492            assert!(reader.next_frame().await?.is_none());
1493            Ok(())
1494        });
1495    }
1496
1497    #[test]
1498    fn resumes_cancelled_automatic_payload_drain() {
1499        let payload = vec![b'x'; BLOCK_SIZE * 2 + 17];
1500        let (bytes, next_member_position) = member_followed_by_empty_member(&payload);
1501
1502        ready_ok(async {
1503            let mut reader = TarReader::new(ChunkedReader::pending_once(bytes, BLOCK_SIZE + 73));
1504            drop(next_member(&mut reader).await?);
1505            cancel_pending(reader.next_frame());
1506
1507            let member = next_member(&mut reader).await?;
1508            assert_eq!(member.header.position, next_member_position);
1509            drop(member);
1510            assert!(reader.next_frame().await?.is_none());
1511            Ok(())
1512        });
1513    }
1514
1515    #[test]
1516    fn reports_cancelled_chunk_errors_at_physical_block_boundaries() {
1517        #[derive(Clone, Copy, Debug)]
1518        enum ExpectedError {
1519            TruncatedPayload,
1520            IncompleteBlock,
1521        }
1522
1523        for (expected, trailing_byte) in [
1524            (ExpectedError::TruncatedPayload, None),
1525            (ExpectedError::IncompleteBlock, Some(b'x')),
1526        ] {
1527            let mut bytes = Vec::new();
1528            append_block(&mut bytes, &header(b'0', (BLOCK_SIZE + 1) as u64));
1529            append_payload(&mut bytes, b"payload");
1530            if let Some(trailing_byte) = trailing_byte {
1531                bytes.push(trailing_byte);
1532            }
1533            let error = ready(async {
1534                let mut reader =
1535                    TarReader::new(ChunkedReader::pending_once(bytes, BLOCK_SIZE + 73));
1536                let Ok(Some(mut member)) = reader.next_frame().await else {
1537                    panic!("expected member");
1538                };
1539                let mut buffer = Vec::new();
1540                cancel_pending(member.payload.next_chunk(&mut buffer, BLOCK_SIZE * 2));
1541                member.payload.next_chunk(&mut buffer, BLOCK_SIZE * 2).await
1542            });
1543            let Err(FrameError { position, inner }) = &error else {
1544                panic!("{expected:?}: expected error, got {error:?}");
1545            };
1546            assert_eq!(*position, (BLOCK_SIZE * 2) as u64, "{expected:?}");
1547            assert!(
1548                matches!(
1549                    (expected, inner),
1550                    (
1551                        ExpectedError::TruncatedPayload,
1552                        FrameErrorInner::TruncatedPayload {
1553                            owner: DataOwner::Member,
1554                            remaining: 1,
1555                        },
1556                    ) | (
1557                        ExpectedError::IncompleteBlock,
1558                        FrameErrorInner::IncompleteBlock { read: 1 },
1559                    )
1560                ),
1561                "{expected:?}: {error:?}"
1562            );
1563        }
1564    }
1565
1566    #[test]
1567    fn groups_gnu_metadata_with_its_member() {
1568        let mut bytes = Vec::new();
1569        append_block(&mut bytes, &gnu_header(b'L', 5));
1570        append_payload(&mut bytes, b"name\0");
1571        append_block(&mut bytes, &gnu_header(b'K', 5));
1572        append_payload(&mut bytes, b"link\0");
1573        append_block(&mut bytes, &gnu_header(b'2', 0));
1574        append_terminator(&mut bytes);
1575
1576        ready_ok(async {
1577            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1578            let mut member = next_member(&mut reader).await?;
1579            let MemberExtensions::Gnu {
1580                long_name: Some(long_name),
1581                long_link: Some(long_link),
1582            } = &member.extensions
1583            else {
1584                panic!("expected GNU extensions");
1585            };
1586            assert_eq!(long_name.payload, b"name\0");
1587            assert_eq!(long_link.payload, b"link\0");
1588            assert!(member.payload.next_block().await?.is_none());
1589            Ok(())
1590        });
1591    }
1592
1593    #[test]
1594    fn rejects_oversized_gnu_extensions_before_consuming_payload() {
1595        let declared_size = 9;
1596        for (case, typeflag) in [("long-name", b'L'), ("long-link", b'K')] {
1597            let mut reader = TarReader::new(ChunkedReader::new(
1598                gnu_header(typeflag, declared_size).to_vec(),
1599                BLOCK_SIZE,
1600            ));
1601            reader.set_max_gnu_extension_size(declared_size - 1);
1602            assert!(
1603                matches!(
1604                    ready(reader.next_frame()),
1605                    Err(FrameError {
1606                        position: 0,
1607                        inner: FrameErrorInner::ExtensionTooLarge {
1608                            format: ArchiveFormat::Gnu,
1609                            size,
1610                            limit,
1611                        },
1612                    }) if size == declared_size && limit == declared_size - 1
1613                ),
1614                "{case}"
1615            );
1616        }
1617
1618        let mut reader = TarReader::new(ChunkedReader::new(
1619            gnu_header(b'L', DEFAULT_MAX_GNU_EXTENSION_SIZE + 1).to_vec(),
1620            BLOCK_SIZE,
1621        ));
1622        assert!(matches!(
1623            ready(reader.next_frame()),
1624            Err(FrameError {
1625                position: 0,
1626                inner: FrameErrorInner::ExtensionTooLarge {
1627                    format: ArchiveFormat::Gnu,
1628                    size,
1629                    limit: DEFAULT_MAX_GNU_EXTENSION_SIZE,
1630                },
1631            }) if size == DEFAULT_MAX_GNU_EXTENSION_SIZE + 1
1632        ));
1633    }
1634
1635    #[test]
1636    fn logical_reader_is_fused_after_oversized_gnu_extension() {
1637        let payload = b"renamed\0";
1638        let payload_size = u64::try_from(payload.len()).expect("payload size should fit u64");
1639        let mut bytes = Vec::new();
1640        append_gnu(&mut bytes, b'L', payload);
1641        append_block(&mut bytes, &gnu_header(b'0', 0));
1642        append_terminator(&mut bytes);
1643
1644        ready_ok(async {
1645            let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1646            reader.set_max_gnu_extension_size(payload_size - 1);
1647            assert!(matches!(
1648                reader.next_frame().await,
1649                Err(FrameError {
1650                    position: 0,
1651                    inner: FrameErrorInner::ExtensionTooLarge {
1652                        format: ArchiveFormat::Gnu,
1653                        size,
1654                        limit,
1655                    },
1656                }) if size == payload_size && limit == payload_size - 1
1657            ));
1658            assert!(reader.next_frame().await?.is_none());
1659            Ok(())
1660        });
1661    }
1662
1663    #[test]
1664    fn preserves_multiblock_gnu_metadata_payloads() {
1665        let mut long_name = vec![b'n'; BLOCK_SIZE * 2 + 37];
1666        long_name.push(0);
1667        let mut long_link = vec![b'l'; BLOCK_SIZE + 19];
1668        long_link.push(0);
1669
1670        let mut bytes = Vec::new();
1671        append_gnu(&mut bytes, b'L', &long_name);
1672        append_gnu(&mut bytes, b'K', &long_link);
1673        append_block(&mut bytes, &gnu_header(b'2', 0));
1674        append_terminator(&mut bytes);
1675
1676        ready_ok(async {
1677            let mut reader = TarReader::new(ChunkedReader::new(bytes, 19));
1678            let member = next_member(&mut reader).await?;
1679            let MemberExtensions::Gnu {
1680                long_name: Some(name_metadata),
1681                long_link: Some(link_metadata),
1682            } = &member.extensions
1683            else {
1684                panic!("expected GNU extensions");
1685            };
1686            assert_eq!(name_metadata.position, 0);
1687            assert_eq!(name_metadata.payload, long_name);
1688            assert_eq!(link_metadata.position, (BLOCK_SIZE * 4) as u64);
1689            assert_eq!(link_metadata.payload, long_link);
1690            member.payload.skip().await?;
1691            assert!(reader.next_frame().await?.is_none());
1692            Ok(())
1693        });
1694    }
1695
1696    #[test]
1697    fn handles_empty_archives_and_trailing_global_pax() {
1698        let mut empty = Vec::new();
1699        append_terminator(&mut empty);
1700        ready_ok(async {
1701            let mut reader = TarReader::new(ChunkedReader::new(empty, BLOCK_SIZE));
1702            assert!(reader.next_frame().await?.is_none());
1703            Ok(())
1704        });
1705
1706        for header in [
1707            header(b'x', record("path", "name").len() as u64),
1708            gnu_header(b'L', 0),
1709        ] {
1710            let mut bytes = Vec::new();
1711            append_block(&mut bytes, &header);
1712            if header[TYPEFLAG_OFFSET] == b'x' {
1713                append_payload(&mut bytes, &record("path", "name"));
1714            }
1715            let error: Result<(), FrameError> = ready(async {
1716                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1717                reader.next_frame().await.map(|_| ())
1718            });
1719            assert!(matches!(
1720                error,
1721                Err(FrameError {
1722                    inner: FrameErrorInner::UnexpectedEof { .. },
1723                    ..
1724                })
1725            ));
1726        }
1727
1728        let mut global = Vec::new();
1729        append_pax(&mut global, b'g', &record("comment", "metadata"));
1730        append_pax(&mut global, b'g', &record("gname", "group"));
1731        append_terminator(&mut global);
1732        ready_ok(async {
1733            let mut reader = TarReader::new(ChunkedReader::new(global, BLOCK_SIZE));
1734            assert!(reader.next_frame().await?.is_none());
1735            Ok(())
1736        });
1737
1738        let mut malformed_global = Vec::new();
1739        append_pax(&mut malformed_global, b'g', b"invalid");
1740        append_terminator(&mut malformed_global);
1741        let error: Result<(), FrameError> = ready(async {
1742            let mut reader = TarReader::new(ChunkedReader::new(malformed_global, BLOCK_SIZE));
1743            reader.next_frame().await.map(|_| ())
1744        });
1745        assert!(matches!(
1746            error,
1747            Err(FrameError {
1748                position: 0,
1749                inner: FrameErrorInner::InvalidPaxRecord { .. },
1750            })
1751        ));
1752    }
1753
1754    #[test]
1755    fn skips_unread_payload_before_advancing() {
1756        for payload_len in [BLOCK_SIZE + 1, PAYLOAD_DRAIN_CHUNK_BYTES + 7] {
1757            let payload = vec![b'a'; payload_len];
1758            let mut bytes = Vec::new();
1759            append_pax(&mut bytes, b'0', &payload);
1760            append_block(&mut bytes, &header(b'0', 0));
1761            append_terminator(&mut bytes);
1762
1763            ready_ok(async {
1764                let mut reader = TarReader::new(ChunkedReader::new(bytes, BLOCK_SIZE));
1765                {
1766                    let member = next_member(&mut reader).await?;
1767                    member.payload.skip().await?;
1768                }
1769                let member = next_member(&mut reader).await?;
1770                assert_eq!(member.header.effective_size, 0);
1771                drop(member);
1772                assert!(reader.next_frame().await?.is_none());
1773                Ok(())
1774            });
1775        }
1776
1777        let mut auto_bytes = Vec::new();
1778        append_block(&mut auto_bytes, &header(b'0', 1));
1779        append_payload(&mut auto_bytes, b"a");
1780        append_block(&mut auto_bytes, &header(b'0', 0));
1781        append_terminator(&mut auto_bytes);
1782        ready_ok(async {
1783            let mut reader = TarReader::new(ChunkedReader::new(auto_bytes, BLOCK_SIZE));
1784            let first = next_member(&mut reader).await?;
1785            drop(first);
1786            assert!(reader.next_frame().await?.is_some());
1787            Ok(())
1788        });
1789    }
1790
1791    #[test]
1792    fn reports_truncated_payload_when_read_or_skipped() {
1793        #[derive(Clone, Copy, Debug)]
1794        enum Operation {
1795            Read,
1796            ExplicitSkip,
1797            AutomaticSkip,
1798        }
1799
1800        for operation in [
1801            Operation::Read,
1802            Operation::ExplicitSkip,
1803            Operation::AutomaticSkip,
1804        ] {
1805            let result: Result<(), FrameError> = ready(async {
1806                let mut reader =
1807                    TarReader::new(ChunkedReader::new(header(b'0', 1).to_vec(), BLOCK_SIZE));
1808                let Ok(Some(mut member)) = reader.next_frame().await else {
1809                    panic!("expected member");
1810                };
1811                match operation {
1812                    Operation::Read => member.payload.next_block().await.map(|_| ()),
1813                    Operation::ExplicitSkip => member.payload.skip().await,
1814                    Operation::AutomaticSkip => {
1815                        drop(member);
1816                        reader.next_frame().await.map(|_| ())
1817                    }
1818                }
1819            });
1820            assert!(
1821                matches!(
1822                    result,
1823                    Err(FrameError {
1824                        inner: FrameErrorInner::TruncatedPayload {
1825                            owner: DataOwner::Member,
1826                            ..
1827                        },
1828                        ..
1829                    })
1830                ),
1831                "{operation:?}"
1832            );
1833        }
1834    }
1835}