Skip to main content

tar_framing/
header.rs

1use crate::{ArchiveFormat, Block};
2
3/// The byte range that a member name appears at in a tar block,
4/// whether pax/ustar or GNU.
5pub const NAME_RANGE: std::ops::Range<usize> = 0..100;
6
7/// The byte range that a member's mode appears at in a tar block,
8/// whether pax/ustar or GNU.
9pub const MODE_RANGE: std::ops::Range<usize> = 100..108;
10
11/// The byte range that a member's user ID appears at in a tar block,
12/// whether pax/ustar or GNU.
13pub const UID_RANGE: std::ops::Range<usize> = 108..116;
14
15/// The byte range that a member's group ID appears at in a tar block,
16/// whether pax/ustar or GNU.
17pub const GID_RANGE: std::ops::Range<usize> = 116..124;
18
19/// The byte range that a member's size appears at in a tar block,
20/// whether pax/ustar or GNU.
21pub const SIZE_RANGE: std::ops::Range<usize> = 124..136;
22
23/// The byte range that a member's mtime appears at in a tar block,
24/// whether pax/ustar or GNU.
25pub const MTIME_RANGE: std::ops::Range<usize> = 136..148;
26
27/// The byte range that a member's checksum appears at in a tar block,
28/// whether pax/ustar or GNU.
29pub const CHECKSUM_RANGE: std::ops::Range<usize> = 148..156;
30
31/// The byte index that a member's typeflag appears at in a tar block,
32/// whether pax/ustar or GNU.
33pub const TYPEFLAG_OFFSET: usize = 156;
34
35/// The byte range that a member's link name (i.e. link target) appears
36/// at in a tar block, whether pax/ustar or GNU.
37pub const LINK_NAME_RANGE: std::ops::Range<usize> = 157..257;
38
39/// The byte range that a member's identity appears at in a tar block,
40/// whether pax/ustar or GNU.
41///
42/// This is both the tar magic and the tar version.
43pub const IDENTITY_RANGE: std::ops::Range<usize> = 257..265;
44
45/// The byte range that a member's user name appears at in a pax/ustar block.
46pub const UNAME_RANGE: std::ops::Range<usize> = 265..297;
47
48/// The byte range that a member's group name appears at in a pax/ustar block.
49pub const GNAME_RANGE: std::ops::Range<usize> = 297..329;
50
51/// The byte range that a member's device-major value appears at in a pax/ustar block.
52pub const DEVMAJOR_RANGE: std::ops::Range<usize> = 329..337;
53
54/// The byte range that a member's device-minor value appears at in a pax/ustar block.
55pub const DEVMINOR_RANGE: std::ops::Range<usize> = 337..345;
56
57/// The byte range that a member's checksum appears at in a tar block.
58///
59/// This is relevant only for pax/ustar; GNU blocks do not include a prefix.
60pub const PREFIX_RANGE: std::ops::Range<usize> = 345..500;
61
62/// The magic and version bytes for a ustar tar block.
63/// ustar blocks form the baseline for pax, since every pax block is
64/// a well-formed ustar block (and what makes it pax is whether
65/// it uses a pax typeflag).
66pub const USTAR_IDENTITY: &[u8; 8] = b"ustar\x0000";
67
68/// The magic and version bytes for a GNU tar block.
69/// Note that, despite not having the proper ustar identity, a GNU tar
70/// block uses a strict superset of the ustar typeflags.
71pub const GNU_IDENTITY: &[u8; 8] = b"ustar  \0";
72
73/// A tar header block (pax, ustar, or GNU) is exactly 512 bytes,
74/// so the logical maximum checksum is `255*512 = 130,560`. However,
75/// the checksum field *itself* is treated as 8 ASCII spaces when
76/// computing the checksum, so the actual maximum is
77/// `(504*255)+(8*32) = 128,776`.
78const MAX_CHECKSUM: u64 = (504 * 255) + (8 * 32);
79const _: () = assert!(MAX_CHECKSUM < 0o777777);
80
81// Pax framing constructs two headers per member. Keeping this fixed-size
82// reduction inline lets LLVM lower each call to a compact vectorized sum.
83#[inline(always)]
84pub(crate) fn checksum(block: &Block) -> u64 {
85    // A block's maximum byte sum fits in u32, which gives LLVM a compact
86    // vectorized reduction.
87    let block_sum = block.iter().map(|byte| u32::from(*byte)).sum::<u32>();
88    let checksum_sum = block[CHECKSUM_RANGE]
89        .iter()
90        .map(|byte| u32::from(*byte))
91        .sum::<u32>();
92    u64::from(block_sum - checksum_sum + CHECKSUM_RANGE.len() as u32 * u32::from(b' '))
93}
94
95#[inline(always)]
96pub(crate) fn encode_checksum(block: &mut Block) {
97    let value = checksum(block);
98
99    // Observe:
100    // 1. We know statically that our computed checksum is no more than MAX_CHECKSUM
101    // 2. We know that MAX_CHECKSUM is less than 0o777777 (262143)
102    //
103    // Therefore, we know that all possible checksums fit within 6 octal digits,
104    // and therefore we can always safely include two padding bytes.
105    //
106    // NOTE: the use of `\0 ` as the suffix is not specified by pax, but appears
107    // to be a convention across tar encoders.
108    debug_assert!(value <= MAX_CHECKSUM);
109    let _ = encode_octal_with_suffix(&mut block[CHECKSUM_RANGE], value, b"\0 ");
110}
111
112pub(crate) fn encode_octal(field: &mut [u8], value: u64) -> bool {
113    encode_octal_with_suffix(field, value, b"\0")
114}
115
116fn encode_octal_with_suffix(field: &mut [u8], value: u64, suffix: &[u8]) -> bool {
117    let Some(width) = field.len().checked_sub(suffix.len()) else {
118        return false;
119    };
120    if width == 0 {
121        return false;
122    }
123    field[width..].copy_from_slice(suffix);
124    encode_octal_digits(&mut field[..width], value)
125}
126
127fn encode_octal_digits(field: &mut [u8], mut value: u64) -> bool {
128    for byte in field.iter_mut().rev() {
129        *byte = b'0' + (value & 0o7) as u8;
130        value >>= 3;
131    }
132    value == 0
133}
134
135/// Parse an octal number from the given bytes.
136///
137/// Per pax, an octal number is a leading-zero filled sequence of octal characters
138/// (0-7), terminated by one or more NUL or space characters.
139pub(crate) fn parse_octal(bytes: &[u8]) -> Option<u64> {
140    let mut value = 0_u64;
141    let mut has_digits = false;
142    let mut terminated = false;
143    for byte in bytes {
144        match *byte {
145            b'0'..=b'7' if !terminated => {
146                value = value.checked_mul(8)?.checked_add(u64::from(*byte - b'0'))?;
147                has_digits = true;
148            }
149            0 | b' ' => terminated = true,
150            _ => return None,
151        }
152    }
153    (has_digits && terminated).then_some(value)
154}
155
156pub(crate) fn is_all_nul(bytes: &[u8]) -> bool {
157    bytes.iter().all(|byte| *byte == 0)
158}
159
160/// Parse a number from the given bytes, depending on the archive format.
161///
162/// See [`parse_octal`] for the pax parsing rules and [`parse_gnu_number`]
163/// for the GNU parsing rules.
164pub(crate) fn parse_number(format: ArchiveFormat, bytes: &[u8]) -> Option<u64> {
165    match format {
166        ArchiveFormat::Pax => parse_octal(bytes),
167        ArchiveFormat::Gnu => parse_gnu_number(bytes),
168    }
169}
170
171/// Parse a number according to the GNU tar rules.
172///
173/// This implements a subset of the GNU rules: negative numbers are rejected entirely,
174/// and we don't reject base256 encodings that *would* fit in the octal encoding.
175/// TODO: Consider rejecting these? The GNU spec describes base256 encodings that would
176/// fit in octal as "reserved for future use."
177fn parse_gnu_number(bytes: &[u8]) -> Option<u64> {
178    match bytes.first()? {
179        0x80 => bytes[1..].iter().try_fold(0_u64, |value, byte| {
180            value.checked_mul(256)?.checked_add(u64::from(*byte))
181        }),
182        // Negative encoding; reject for now. This would also be rejected by
183        // `parse_octal` but here is clearer.
184        0xff => None,
185        _ => parse_octal(bytes),
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn encodes_octal_values_that_fit_the_field() {
195        let mut field = [0xff; 4];
196        assert!(encode_octal(&mut field, 0o17));
197        assert_eq!(&field, b"017\0");
198        assert_eq!(parse_octal(&field), Some(0o17));
199
200        assert!(encode_octal(&mut field, 0o777));
201        assert_eq!(&field, b"777\0");
202        assert!(!encode_octal(&mut field, 0o1000));
203        assert!(!encode_octal(&mut [], 0));
204        assert!(!encode_octal(&mut [0], 0));
205    }
206
207    #[test]
208    fn parses_strict_octal_fields() {
209        for (field, expected) in [
210            // OK: leading zeroes.
211            (&b"000017 "[..], Some(0o17)),
212            (&b"0000000000000000000000000000017 "[..], Some(0o17)),
213            // OK: 0o17, null terminated.
214            (&b"17\0"[..], Some(0o17)),
215            // OK: 0o17, space terminated.
216            (&b"17 "[..], Some(0o17)),
217            // OK: 0o17, space terminated (trailing null ignored)
218            (&b"17 \0"[..], Some(0o17)),
219            // Invalid: empty
220            (&b""[..], None),
221            // Invalid: terminator only
222            (&b"\0"[..], None),
223            (&b" "[..], None),
224            // Invalid: no terminator
225            (&b"17"[..], None),
226            // Invalid: not in octal domain
227            (&b"18\0"[..], None),
228            // Invalid: not in octal domain, even after terminator
229            (&b"1\0\x32"[..], None),
230            // Invalid: octal after terminator.
231            (&b"1\0\x31"[..], None),
232            (&b"1 1"[..], None),
233            // Invalid: not in octal domain.
234            (&[0x80, 0][..], None),
235            // Invalid: overflows u64.
236            (&b"77777777777777777777777 "[..], None),
237            (&b"77777777777777777777777\0"[..], None),
238        ] {
239            assert_eq!(parse_octal(field), expected, "{field:?}");
240        }
241    }
242
243    #[test]
244    fn checksums_known_blocks() {
245        let zero_block = [0; crate::BLOCK_SIZE];
246        let mut x_typeflag_block = zero_block;
247        x_typeflag_block[TYPEFLAG_OFFSET] = b'x';
248        x_typeflag_block[CHECKSUM_RANGE].fill(0xff);
249        let maximum_block = [0xff; crate::BLOCK_SIZE];
250
251        for (name, mut block, expected) in [
252            ("zero block", zero_block, b"000400\0 "),
253            (
254                "x typeflag with junk checksum bytes",
255                x_typeflag_block,
256                b"000570\0 ",
257            ),
258            ("maximum block", maximum_block, b"373410\0 "),
259        ] {
260            assert_eq!(Some(checksum(&block)), parse_octal(expected), "{name}");
261            encode_checksum(&mut block);
262            assert_eq!(&block[CHECKSUM_RANGE], expected, "{name}");
263        }
264    }
265}