Skip to main content

tar_framing/
write.rs

1//! Strict POSIX-pax block construction.
2//!
3//! This module builds deterministic pax framing blocks without performing I/O.
4//! Higher-level crates remain responsible for writing payload bytes and for
5//! deciding which filesystem entries are appropriate to archive.
6
7use crate::{
8    BLOCK_SIZE, Block, PaxKeyword, UstarKind,
9    header::{
10        DEVMAJOR_RANGE, DEVMINOR_RANGE, GID_RANGE, IDENTITY_RANGE, LINK_NAME_RANGE, MODE_RANGE,
11        MTIME_RANGE, NAME_RANGE, PREFIX_RANGE, SIZE_RANGE, TYPEFLAG_OFFSET, UID_RANGE,
12        USTAR_IDENTITY, encode_checksum, encode_octal,
13    },
14};
15
16/// The longest string representation of a u64.
17/// `u64::MAX` is `18446744073709551615` so `len(18446744073709551615) == 20`.
18const MAX_DECIMAL_U64_BYTES: usize = 20;
19const _: () = assert!(MAX_DECIMAL_U64_BYTES == (u64::MAX.ilog10() as usize) + 1);
20
21const MAX_SEQUENCE_NAME_BYTES: usize = b"PaxHeaders/".len() + MAX_DECIMAL_U64_BYTES;
22
23/// Concatenated, zero-padded decimal representations of 0 through 99. The pair
24/// for `value` starts at offset `value * 2`.
25///
26/// This gives us a very cheap LUT in [`decimal_u64`].
27const DECIMAL_PAIRS: &[u8; 200] = b"\
28    0001020304050607080910111213141516171819\
29    2021222324252627282930313233343536373839\
30    4041424344454647484950515253545556575859\
31    6061626364656667686970717273747576777879\
32    8081828384858687888990919293949596979899";
33const ZERO_BLOCK: Block = [0; BLOCK_SIZE];
34const END_MARKER_BYTES: [u8; BLOCK_SIZE * 2] = [0; BLOCK_SIZE * 2];
35
36/// Metadata needed to frame one supported pax archive member.
37#[derive(Clone, Copy, Debug, Eq, PartialEq)]
38pub struct PaxMember<'a> {
39    /// The UTF-8 member path written into the local pax header.
40    ///
41    /// Non-directory paths cannot end in `/` or a final `.` or `..` component.
42    pub path: &'a str,
43    /// The supported ordinary member kind.
44    pub kind: UstarKind,
45    /// The meaningful regular-file payload size.
46    pub size: u64,
47    /// The UTF-8 symbolic-link target, when `kind` is [`UstarKind::SymbolicLink`].
48    pub link_path: Option<&'a str>,
49    /// Whether a regular file should carry executable intent.
50    pub executable: bool,
51}
52
53/// A failure while constructing strict pax framing blocks.
54#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
55pub enum FramingWriteError {
56    /// The requested ordinary member kind is outside the encoder subset.
57    #[error("cannot encode unsupported member type {kind:?}")]
58    UnsupportedMemberKind {
59        /// The rejected ordinary member kind.
60        kind: UstarKind,
61    },
62    /// A member kind that cannot carry data was assigned a nonzero payload size.
63    #[error("member type {kind:?} cannot carry payload size {size}")]
64    InvalidMemberSize {
65        /// The affected member kind.
66        kind: UstarKind,
67        /// The rejected payload size.
68        size: u64,
69    },
70    /// A symbolic link was missing its required target.
71    #[error("symbolic-link member is missing its link path")]
72    MissingLinkPath,
73    /// A non-symbolic-link member unexpectedly supplied a link target.
74    #[error("member type {kind:?} cannot carry a link path")]
75    UnexpectedLinkPath {
76        /// The affected member kind.
77        kind: UstarKind,
78    },
79    /// A required text value is empty or contains a NUL byte.
80    #[error("invalid pax {field}: values must be non-empty and cannot contain NUL bytes")]
81    InvalidText {
82        /// The affected metadata field.
83        field: &'static str,
84    },
85    /// A PAX record keyword is empty or contains `=`.
86    #[error("pax record keywords must be non-empty and cannot contain '='")]
87    InvalidPaxRecordKeyword,
88    /// A non-directory member path has a suffix that requires a directory.
89    #[error("member type {kind:?} cannot have a directory-required path suffix")]
90    DirectoryRequiredPathSuffix {
91        /// The affected member kind.
92        kind: UstarKind,
93    },
94    /// The local pax extended header payload cannot fit its ustar size field.
95    #[error("pax extended header payload is too large: {size} bytes")]
96    ExtendedHeaderTooLarge {
97        /// The unpadded local pax payload size.
98        size: u64,
99    },
100    /// An internal length computation exceeded its framing range.
101    #[error("arithmetic overflow while constructing {context}")]
102    ArithmeticOverflow {
103        /// The failed framing computation.
104        context: &'static str,
105    },
106}
107
108/// Appends one PAX extended-header record without block padding to `output`.
109///
110/// `keyword` must be nonempty and cannot contain `=`. `value` is copied
111/// verbatim and may contain arbitrary bytes.
112pub fn append_pax_record(
113    output: &mut Vec<u8>,
114    keyword: &PaxKeyword,
115    value: &[u8],
116) -> Result<(), FramingWriteError> {
117    let (namespace, name) = keyword.components();
118    if namespace.is_empty()
119        || namespace.contains('=')
120        || name.is_some_and(|name| name.is_empty() || name.contains('='))
121    {
122        return Err(FramingWriteError::InvalidPaxRecordKeyword);
123    }
124    let len = record_len(keyword, value)?;
125    output
126        .len()
127        .checked_add(len)
128        .ok_or(FramingWriteError::ArithmeticOverflow {
129            context: "pax record output length",
130        })?;
131    output.reserve(len);
132    append_record_with_len(output, keyword, value, len);
133    Ok(())
134}
135
136/// Writes one local pax header and its ordinary member header into `buffer`.
137///
138/// The buffer is cleared first and its allocation is reused when possible.
139/// The resulting bytes contain the local extended header block, its padded
140/// records, and the ordinary POSIX-ustar member header block. Member payload
141/// bytes and padding remain the caller's responsibility.
142pub fn frame_pax_member_into(
143    sequence: u64,
144    member: PaxMember<'_>,
145    buffer: &mut Vec<u8>,
146) -> Result<(), FramingWriteError> {
147    validate_member(member)?;
148
149    let mut size_buffer = [0; MAX_DECIMAL_U64_BYTES];
150    let size = decimal_u64(member.size, &mut size_buffer);
151    let link_path = member.link_path.map(str::as_bytes);
152    buffer.clear();
153    buffer.resize(BLOCK_SIZE, 0);
154    append_pax_record(buffer, &PaxKeyword::Path, member.path.as_bytes())?;
155    append_pax_record(buffer, &PaxKeyword::Size, size)?;
156    if let Some(link_path) = link_path {
157        append_pax_record(buffer, &PaxKeyword::LinkPath, link_path)?;
158    }
159    let payload_len = buffer.len() - BLOCK_SIZE;
160    let payload_size =
161        u64::try_from(payload_len).map_err(|_| FramingWriteError::ArithmeticOverflow {
162            context: "pax payload length",
163        })?;
164    let padded_payload_len = padded_payload_len(payload_len)?;
165
166    let mut sequence_name_buffer = [0; MAX_SEQUENCE_NAME_BYTES];
167    let extended_name = prefixed_decimal_name(b"PaxHeaders/", sequence, &mut sequence_name_buffer);
168    let sequence_name_len = extended_name.len();
169    let fallback_size = if fits_octal(SIZE_RANGE.len(), member.size) {
170        member.size
171    } else {
172        0
173    };
174    let (mode, typeflag) = match member.kind {
175        UstarKind::Regular => (if member.executable { 0o755 } else { 0o644 }, b'0'),
176        UstarKind::Directory => (0o755, b'5'),
177        UstarKind::SymbolicLink => (0o777, b'2'),
178        _ => {
179            return Err(FramingWriteError::UnsupportedMemberKind { kind: member.kind });
180        }
181    };
182    let framing_len = padded_payload_len.checked_add(BLOCK_SIZE * 2).ok_or(
183        FramingWriteError::ArithmeticOverflow {
184            context: "pax framing length",
185        },
186    )?;
187    buffer.resize(framing_len, 0);
188    let (extended_header, rest) = buffer.split_at_mut(BLOCK_SIZE);
189    let (_, member_header) = rest.split_at_mut(padded_payload_len);
190    build_header_into(
191        extended_header,
192        (&[], extended_name),
193        0o644,
194        payload_size,
195        b'x',
196        b"",
197    )?;
198    let member_path: (&[u8], &[u8]) =
199        if let Some(member_path) = split_ustar_path(member.path.as_bytes()) {
200            member_path
201        } else {
202            // The extended header has copied its name, and both prefixes have the
203            // same length, so the sequence-name buffer can become the fallback.
204            sequence_name_buffer[..b"PaxEntries/".len()].copy_from_slice(b"PaxEntries/");
205            (&[], &sequence_name_buffer[..sequence_name_len])
206        };
207    build_header_into(
208        member_header,
209        member_path,
210        mode,
211        fallback_size,
212        typeflag,
213        member.link_path.unwrap_or_default().as_bytes(),
214    )?;
215    Ok(())
216}
217
218/// Returns the required two-block POSIX end-of-archive marker as contiguous bytes.
219pub fn end_marker_bytes() -> &'static [u8] {
220    &END_MARKER_BYTES
221}
222
223/// Returns the zero padding required after a payload of `size` meaningful bytes.
224#[inline]
225pub fn payload_padding(size: u64) -> &'static [u8] {
226    // `BLOCK_SIZE` is a power of two, so the low bits of the wrapped negation
227    // are the distance to the next block boundary, including zero when aligned.
228    let padding = size.wrapping_neg() & (BLOCK_SIZE as u64 - 1);
229    &ZERO_BLOCK[..padding as usize]
230}
231
232fn validate_member(member: PaxMember<'_>) -> Result<(), FramingWriteError> {
233    validate_text("path", member.path)?;
234    // Defensive: our own decoder rejects non-directories with suffixes that
235    // require directory resolution, so we should never encode one.
236    // TODO: Single-source this check, maybe in name validation?
237    if !matches!(member.kind, UstarKind::Directory)
238        && (member.path.ends_with('/')
239            || member
240                .path
241                .rsplit('/')
242                .next()
243                .is_some_and(|component| matches!(component, "." | "..")))
244    {
245        return Err(FramingWriteError::DirectoryRequiredPathSuffix { kind: member.kind });
246    }
247    match member.kind {
248        UstarKind::Regular | UstarKind::Directory if member.link_path.is_some() => {
249            Err(FramingWriteError::UnexpectedLinkPath { kind: member.kind })
250        }
251        UstarKind::Directory | UstarKind::SymbolicLink if member.size != 0 => {
252            Err(FramingWriteError::InvalidMemberSize {
253                kind: member.kind,
254                size: member.size,
255            })
256        }
257        UstarKind::Regular | UstarKind::Directory => Ok(()),
258        UstarKind::SymbolicLink => validate_text(
259            "linkpath",
260            member.link_path.ok_or(FramingWriteError::MissingLinkPath)?,
261        ),
262        _ => Err(FramingWriteError::UnsupportedMemberKind { kind: member.kind }),
263    }
264}
265
266fn validate_text(field: &'static str, value: &str) -> Result<(), FramingWriteError> {
267    if value.is_empty() || value.contains('\0') {
268        return Err(FramingWriteError::InvalidText { field });
269    }
270    Ok(())
271}
272
273fn record_len(keyword: &PaxKeyword, value: &[u8]) -> Result<usize, FramingWriteError> {
274    let (namespace, name) = keyword.components();
275    let keyword_len = name
276        .map_or(Some(namespace.len()), |name| {
277            namespace
278                .len()
279                .checked_add(1)
280                .and_then(|len| len.checked_add(name.len()))
281        })
282        .ok_or(FramingWriteError::ArithmeticOverflow {
283            context: "pax record keyword length",
284        })?;
285    let suffix_len = keyword_len
286        .checked_add(value.len())
287        .and_then(|len| len.checked_add(3))
288        .ok_or(FramingWriteError::ArithmeticOverflow {
289            context: "pax record length",
290        })?;
291    let tentative_len = (suffix_len.ilog10() as usize + 1)
292        .checked_add(suffix_len)
293        .ok_or(FramingWriteError::ArithmeticOverflow {
294            context: "pax record length",
295        })?;
296    (tentative_len.ilog10() as usize + 1)
297        .checked_add(suffix_len)
298        .ok_or(FramingWriteError::ArithmeticOverflow {
299            context: "pax record length",
300        })
301}
302
303fn append_record_with_len(payload: &mut Vec<u8>, keyword: &PaxKeyword, value: &[u8], len: usize) {
304    append_decimal_usize(payload, len);
305    payload.push(b' ');
306    let (namespace, name) = keyword.components();
307    payload.extend_from_slice(namespace.as_bytes());
308    if let Some(name) = name {
309        payload.push(b'.');
310        payload.extend_from_slice(name.as_bytes());
311    }
312    payload.push(b'=');
313    payload.extend_from_slice(value);
314    payload.push(b'\n');
315}
316
317fn build_header_into(
318    block: &mut [u8],
319    (prefix, name): (&[u8], &[u8]),
320    mode: u64,
321    size: u64,
322    typeflag: u8,
323    link_path: &[u8],
324) -> Result<(), FramingWriteError> {
325    let block: &mut Block =
326        block
327            .try_into()
328            .map_err(|_| FramingWriteError::ArithmeticOverflow {
329                context: "ustar header block length",
330            })?;
331    block[NAME_RANGE.start..NAME_RANGE.start + name.len()].copy_from_slice(name);
332    block[PREFIX_RANGE.start..PREFIX_RANGE.start + prefix.len()].copy_from_slice(prefix);
333    if link_path.len() <= LINK_NAME_RANGE.len() {
334        block[LINK_NAME_RANGE.start..LINK_NAME_RANGE.start + link_path.len()]
335            .copy_from_slice(link_path);
336    }
337    if !encode_octal(&mut block[MODE_RANGE], mode)
338        || !encode_octal(&mut block[UID_RANGE], 0)
339        || !encode_octal(&mut block[GID_RANGE], 0)
340        || !encode_octal(&mut block[SIZE_RANGE], size)
341        || !encode_octal(&mut block[MTIME_RANGE], 0)
342        // pax says that all other fields are "leading zero-filled octal numbers," even
343        // if effectively unused (like devmajor and devminor are). We pedantically follow
344        // this; GNU tar doesn't.
345        || !encode_octal(&mut block[DEVMAJOR_RANGE], 0)
346        || !encode_octal(&mut block[DEVMINOR_RANGE], 0)
347    {
348        return Err(FramingWriteError::ExtendedHeaderTooLarge { size });
349    }
350    block[TYPEFLAG_OFFSET] = typeflag;
351    block[IDENTITY_RANGE].copy_from_slice(USTAR_IDENTITY);
352    encode_checksum(block);
353    Ok(())
354}
355
356fn fits_octal(field_len: usize, value: u64) -> bool {
357    value.checked_ilog(8).map_or(1, |log| log + 1) < field_len as u32
358}
359
360fn split_ustar_path(path: &[u8]) -> Option<(&[u8], &[u8])> {
361    if path.len() <= NAME_RANGE.len() {
362        return Some((&[], path));
363    }
364    path.iter()
365        .enumerate()
366        .rev()
367        .filter(|(_, byte)| **byte == b'/')
368        .find_map(|(separator, _)| {
369            let prefix = &path[..separator];
370            let name = &path[separator + 1..];
371            if !prefix.is_empty()
372                && prefix.len() <= PREFIX_RANGE.len()
373                && !name.is_empty()
374                && name.len() <= NAME_RANGE.len()
375            {
376                Some((prefix, name))
377            } else {
378                None
379            }
380        })
381}
382
383fn padded_payload_len(len: usize) -> Result<usize, FramingWriteError> {
384    len.checked_next_multiple_of(BLOCK_SIZE)
385        .ok_or(FramingWriteError::ArithmeticOverflow {
386            context: "padded pax payload length",
387        })
388}
389
390fn prefixed_decimal_name<'a>(
391    prefix: &[u8; b"PaxHeaders/".len()],
392    value: u64,
393    buffer: &'a mut [u8; MAX_SEQUENCE_NAME_BYTES],
394) -> &'a [u8] {
395    let mut digits_buffer = [0; MAX_DECIMAL_U64_BYTES];
396    let digits = decimal_u64(value, &mut digits_buffer);
397    let len = prefix.len() + digits.len();
398    buffer[..prefix.len()].copy_from_slice(prefix);
399    buffer[prefix.len()..len].copy_from_slice(digits);
400    &buffer[..len]
401}
402
403/// Writes `value` as right-aligned decimal ASCII and returns the initialized suffix.
404/// Short values stay inline; larger values use [`decimal_u64_large`].
405#[inline]
406fn decimal_u64(value: u64, buffer: &mut [u8; MAX_DECIMAL_U64_BYTES]) -> &[u8] {
407    // TODO: Replace this with `core::fmt::NumBuffer` once our MSRV is 1.98+.
408    // Rust's (more generic) version:
409    // <https://github.com/rust-lang/rust/blob/c1b22f44c3/library/core/src/fmt/num.rs#L190-L253>
410
411    // PAX record lengths and sequence numbers are usually short, so keep their
412    // common path straight-line. Larger values are rendered four digits at a time.
413    if value < 10 {
414        buffer[MAX_DECIMAL_U64_BYTES - 1] = b'0' + value as u8;
415        return &buffer[MAX_DECIMAL_U64_BYTES - 1..];
416    }
417    if value < 100 {
418        let value = value as u8;
419        let tens = value / 10;
420        buffer[MAX_DECIMAL_U64_BYTES - 2] = b'0' + tens;
421        buffer[MAX_DECIMAL_U64_BYTES - 1] = b'0' + value - tens * 10;
422        return &buffer[MAX_DECIMAL_U64_BYTES - 2..];
423    }
424    if value < 1_000 {
425        let value = value as u16;
426        let hundreds = value / 100;
427        let remainder = (value - hundreds * 100) as u8;
428        let tens = remainder / 10;
429        buffer[MAX_DECIMAL_U64_BYTES - 3] = b'0' + hundreds as u8;
430        buffer[MAX_DECIMAL_U64_BYTES - 2] = b'0' + tens;
431        buffer[MAX_DECIMAL_U64_BYTES - 1] = b'0' + remainder - tens * 10;
432        return &buffer[MAX_DECIMAL_U64_BYTES - 3..];
433    }
434
435    decimal_u64_large(value, buffer)
436}
437
438/// Writes larger values in four-digit chunks, leaving the leading group to the tail.
439/// Note: this is **not** inlined so that our fast path ([`decimal_u64`]) can be inlined
440/// without pulling the slower path's code into each caller, which would harm locality.
441#[inline(never)]
442fn decimal_u64_large(value: u64, buffer: &mut [u8; MAX_DECIMAL_U64_BYTES]) -> &[u8] {
443    let mut remaining = value;
444    let mut start = buffer.len();
445
446    // Leave the leading one to four digits for the tail below, avoiding one
447    // final division by 10,000 when the digit count is a multiple of four.
448    while remaining >= 10_000 {
449        start -= 4;
450        let quad = (remaining % 10_000) as usize;
451        remaining /= 10_000;
452        let first_pair = quad / 100 * 2;
453        let second_pair = quad % 100 * 2;
454        buffer[start] = DECIMAL_PAIRS[first_pair];
455        buffer[start + 1] = DECIMAL_PAIRS[first_pair + 1];
456        buffer[start + 2] = DECIMAL_PAIRS[second_pair];
457        buffer[start + 3] = DECIMAL_PAIRS[second_pair + 1];
458    }
459
460    if remaining >= 100 {
461        start -= 2;
462        let pair = remaining as usize % 100 * 2;
463        remaining /= 100;
464        buffer[start] = DECIMAL_PAIRS[pair];
465        buffer[start + 1] = DECIMAL_PAIRS[pair + 1];
466    }
467
468    if remaining >= 10 {
469        start -= 2;
470        let pair = remaining as usize * 2;
471        buffer[start] = DECIMAL_PAIRS[pair];
472        buffer[start + 1] = DECIMAL_PAIRS[pair + 1];
473    } else {
474        start -= 1;
475        buffer[start] = b'0' + remaining as u8;
476    }
477
478    &buffer[start..]
479}
480
481fn append_decimal_usize(output: &mut Vec<u8>, value: usize) {
482    let mut buffer = [0; MAX_DECIMAL_U64_BYTES];
483    output.extend_from_slice(decimal_u64(value as u64, &mut buffer));
484}
485
486#[cfg(test)]
487mod tests {
488    use std::sync::Arc;
489
490    use tokio_stream::StreamExt;
491
492    use super::*;
493    use crate::{
494        PaxKind, PaxRecord, PaxString, PaxValue,
495        header::parse_octal,
496        stream::{Frame, TarStream},
497        test_support::{ChunkedReader, ready},
498    };
499
500    fn pax_member<'a>(
501        path: &'a str,
502        kind: UstarKind,
503        size: u64,
504        link_path: Option<&'a str>,
505        executable: bool,
506    ) -> PaxMember<'a> {
507        PaxMember {
508            path,
509            kind,
510            size,
511            link_path,
512            executable,
513        }
514    }
515
516    fn frame_archive(
517        sequence: u64,
518        member: PaxMember<'_>,
519        payload: &[u8],
520    ) -> Result<Vec<u8>, FramingWriteError> {
521        let mut bytes = Vec::new();
522        frame_pax_member_into(sequence, member, &mut bytes)?;
523        bytes.extend_from_slice(payload);
524        bytes.extend_from_slice(payload_padding(member.size));
525        bytes.extend_from_slice(end_marker_bytes());
526        Ok(bytes)
527    }
528
529    fn assert_decimal_u64_matches_standard(value: u64) {
530        let mut buffer = [b'?'; MAX_DECIMAL_U64_BYTES];
531        assert_eq!(
532            decimal_u64(value, &mut buffer),
533            value.to_string().as_bytes(),
534            "value: {value}"
535        );
536    }
537
538    #[test]
539    fn frames_regular_directory_and_symbolic_link_members() {
540        let members = [
541            pax_member("bin/tool", UstarKind::Regular, 3, None, true),
542            pax_member("bin", UstarKind::Directory, 0, None, false),
543            pax_member("alias", UstarKind::SymbolicLink, 0, Some("bin/tool"), false),
544        ];
545        for (sequence, member) in members.into_iter().enumerate() {
546            let payload: &[u8] = if member.kind == UstarKind::Regular {
547                b"run"
548            } else {
549                b""
550            };
551            let bytes = frame_archive(sequence as u64, member, payload).expect("valid member");
552            let frames = ready(TarStream::new(ChunkedReader::new(bytes, 19)).collect::<Vec<_>>());
553            assert!(matches!(
554                &frames[0],
555                Ok(Frame::Pax(frame)) if frame.kind == PaxKind::Local
556            ));
557            let header = frames
558                .iter()
559                .find_map(|frame| match frame {
560                    Ok(Frame::Header(header)) => Some(header),
561                    _ => None,
562                })
563                .expect("member header");
564            assert_eq!(header.kind, member.kind);
565            assert_eq!(header.effective_size, member.size);
566            let records = frames
567                .iter()
568                .find_map(|frame| match frame {
569                    Ok(Frame::Data(data)) => data.completed_pax_records(),
570                    _ => None,
571                })
572                .expect("local pax records");
573            assert!(
574                records.contains(&PaxRecord::Path(PaxValue::Value(PaxString::Utf8(
575                    member.path.to_owned().into()
576                ))))
577            );
578        }
579    }
580
581    #[test]
582    fn frames_members_into_a_reusable_buffer() {
583        let member = pax_member("bin/tool", UstarKind::Regular, 3, None, true);
584        let mut bytes = Vec::with_capacity(BLOCK_SIZE * 3);
585        bytes.extend_from_slice(b"stale bytes");
586        frame_pax_member_into(7, member, &mut bytes).expect("valid member");
587        assert_eq!(bytes.len(), BLOCK_SIZE * 3);
588        let capacity = bytes.capacity();
589
590        frame_pax_member_into(8, member, &mut bytes).expect("valid member");
591        assert_eq!(bytes.len(), BLOCK_SIZE * 3);
592        assert_eq!(bytes.capacity(), capacity);
593
594        bytes.extend_from_slice(b"run");
595        bytes.resize(bytes.len() + BLOCK_SIZE - 3, 0);
596        bytes.extend_from_slice(end_marker_bytes());
597        let frames = ready(TarStream::new(ChunkedReader::new(bytes, 19)).collect::<Vec<_>>());
598        assert!(frames.iter().all(Result::is_ok));
599    }
600
601    #[test]
602    fn encodes_unused_device_fields_as_octal_zero() {
603        let mut bytes = Vec::new();
604        frame_pax_member_into(
605            0,
606            pax_member("file", UstarKind::Regular, 0, None, false),
607            &mut bytes,
608        )
609        .expect("valid member");
610
611        for (kind, header) in [
612            ("pax", &bytes[..BLOCK_SIZE]),
613            ("member", &bytes[bytes.len() - BLOCK_SIZE..]),
614        ] {
615            assert_eq!(parse_octal(&header[DEVMAJOR_RANGE]), Some(0), "{kind}");
616            assert_eq!(parse_octal(&header[DEVMINOR_RANGE]), Some(0), "{kind}");
617        }
618    }
619
620    #[test]
621    fn returns_payload_padding_and_contiguous_end_marker_bytes() {
622        for (size, expected) in [
623            (0, &[] as &[u8]),
624            (BLOCK_SIZE as u64, &[]),
625            (1, &[0; BLOCK_SIZE - 1]),
626            ((BLOCK_SIZE + 7) as u64, &[0; BLOCK_SIZE - 7]),
627            (u64::MAX - (BLOCK_SIZE as u64 - 1), &[]),
628            (u64::MAX - (BLOCK_SIZE as u64 - 2), &[0; BLOCK_SIZE - 1]),
629            (u64::MAX, &[0; 1]),
630        ] {
631            assert_eq!(payload_padding(size), expected, "{size}");
632        }
633
634        assert_eq!(end_marker_bytes().len(), BLOCK_SIZE * 2);
635        assert!(end_marker_bytes().iter().all(|byte| *byte == 0));
636    }
637
638    #[test]
639    fn appends_standalone_pax_records_across_decimal_boundaries() {
640        let mut record = Vec::new();
641        assert_eq!(
642            append_pax_record(&mut record, &PaxKeyword::Path, b"b"),
643            Ok(())
644        );
645        assert_eq!(record, b"9 path=b\n");
646        record.clear();
647        assert_eq!(
648            append_pax_record(&mut record, &PaxKeyword::Atime, b"x"),
649            Ok(())
650        );
651        assert_eq!(record, b"11 atime=x\n");
652        for keyword in [
653            PaxKeyword::Realtime(Arc::from("")),
654            PaxKeyword::Vendor {
655                vendor: Arc::from("invalid=vendor"),
656                name: Arc::from("attribute"),
657            },
658        ] {
659            assert_eq!(
660                append_pax_record(&mut Vec::new(), &keyword, b"value"),
661                Err(FramingWriteError::InvalidPaxRecordKeyword)
662            );
663        }
664    }
665
666    #[test]
667    fn formats_u64_values_across_decimal_boundaries() {
668        for (value, expected) in [
669            (0, "0"),
670            (9, "9"),
671            (10, "10"),
672            (99, "99"),
673            (100, "100"),
674            (999, "999"),
675            (1_000, "1000"),
676            (1_001, "1001"),
677            (9_999, "9999"),
678            (10_000, "10000"),
679            (1_000_001, "1000001"),
680            (u64::MAX, "18446744073709551615"),
681        ] {
682            let mut buffer = [0; MAX_DECIMAL_U64_BYTES];
683            assert_eq!(decimal_u64(value, &mut buffer), expected.as_bytes());
684        }
685    }
686
687    #[test]
688    fn matches_standard_formatting_across_the_u64_range() {
689        // Exhaust the short-value paths and every possible four-digit suffix.
690        for value in 0..1_000_000 {
691            assert_decimal_u64_matches_standard(value);
692        }
693
694        // Exercise both sides of every remaining decimal-width transition.
695        for exponent in 6..=19 {
696            let power = 10_u64.pow(exponent);
697            for distance in 0..=9 {
698                if let Some(value) = power.checked_sub(distance) {
699                    assert_decimal_u64_matches_standard(value);
700                }
701                if distance != 0
702                    && let Some(value) = power.checked_add(distance)
703                {
704                    assert_decimal_u64_matches_standard(value);
705                }
706            }
707        }
708
709        // This full-period LCG samples the full u64 domain reproducibly.
710        let mut value = 0x4d59_5df4_d0f3_3173;
711        for _ in 0..250_000 {
712            assert_decimal_u64_matches_standard(value);
713            value = value
714                .wrapping_mul(6_364_136_223_846_793_005)
715                .wrapping_add(1_442_695_040_888_963_407);
716        }
717    }
718
719    #[test]
720    fn uses_generated_fallbacks_for_long_paths_and_links() {
721        let path = format!("{}/{}", "a".repeat(156), "b".repeat(101));
722        let link_path = "c".repeat(101);
723        let member = pax_member(&path, UstarKind::SymbolicLink, 0, Some(&link_path), false);
724        let mut bytes = Vec::new();
725        frame_pax_member_into(7, member, &mut bytes).expect("valid member");
726        let member_header = &bytes[bytes.len() - BLOCK_SIZE..];
727        assert_eq!(
728            &member_header[NAME_RANGE.start..NAME_RANGE.start + 12],
729            b"PaxEntries/7"
730        );
731        assert!(member_header[LINK_NAME_RANGE].iter().all(|byte| *byte == 0));
732
733        bytes.extend_from_slice(end_marker_bytes());
734        let frames = ready(TarStream::new(ChunkedReader::new(bytes, 23)).collect::<Vec<_>>());
735        let records = frames
736            .iter()
737            .find_map(|frame| match frame {
738                Ok(Frame::Data(data)) => data.completed_pax_records(),
739                _ => None,
740            })
741            .expect("local pax records");
742        assert_eq!(records.len(), 3);
743    }
744
745    #[test]
746    fn rejects_unsupported_or_inconsistent_members() {
747        for (member, expected) in [
748            (
749                pax_member("file", UstarKind::HardLink, 0, None, false),
750                FramingWriteError::UnsupportedMemberKind {
751                    kind: UstarKind::HardLink,
752                },
753            ),
754            (
755                pax_member("link", UstarKind::SymbolicLink, 1, Some("file"), false),
756                FramingWriteError::InvalidMemberSize {
757                    kind: UstarKind::SymbolicLink,
758                    size: 1,
759                },
760            ),
761        ] {
762            assert_eq!(
763                frame_pax_member_into(0, member, &mut Vec::new()),
764                Err(expected)
765            );
766        }
767    }
768
769    #[test]
770    fn uses_zero_ustar_fallback_for_oversized_regular_payloads() {
771        let mut bytes = Vec::new();
772        frame_pax_member_into(
773            0,
774            pax_member("large", UstarKind::Regular, u64::MAX, None, false),
775            &mut bytes,
776        )
777        .expect("pax size can represent u64 values");
778        assert_eq!(
779            parse_octal(&bytes[bytes.len() - BLOCK_SIZE..][SIZE_RANGE]),
780            Some(0)
781        );
782    }
783}