tar_no_std/
header.rs

1/*
2MIT License
3
4Copyright (c) 2025 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24//! TAR header definition taken from <https://www.gnu.org/software/tar/manual/html_node/Standard.html>.
25//! A Tar-archive is a collection of 512-byte sized blocks. Unfortunately there are several
26//! TAR-like archive specifications. An Overview can be found here:
27//! <https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>
28//!
29//! This library focuses on extracting files from the GNU Tar format.
30
31#![allow(non_upper_case_globals)]
32
33use crate::{BLOCKSIZE, NAME_LEN, PREFIX_LEN, TarFormatDecimal, TarFormatOctal, TarFormatString};
34use core::error::Error;
35use core::fmt::{Debug, Display, Formatter};
36use core::num::ParseIntError;
37
38/// Errors that may happen when parsing the [`ModeFlags`].
39#[derive(Debug)]
40pub enum ModeError {
41    ParseInt(ParseIntError),
42    IllegalMode,
43}
44
45impl Display for ModeError {
46    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
47        Debug::fmt(self, f)
48    }
49}
50
51impl Error for ModeError {
52    fn source(&self) -> Option<&(dyn Error + 'static)> {
53        match self {
54            Self::ParseInt(e) => Some(e),
55            Self::IllegalMode => None,
56        }
57    }
58}
59
60/// Wrapper around the UNIX file permissions given in octal ASCII.
61#[derive(Copy, Clone, PartialEq, Eq)]
62#[repr(transparent)]
63pub struct Mode(TarFormatOctal<8>);
64
65impl Mode {
66    /// Parses the [`ModeFlags`] from the mode string.
67    ///
68    /// # Errors
69    /// Returns [`ModeError`] for invalid values.
70    pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
71        let bits = self.0.as_number::<u64>().map_err(ModeError::ParseInt)?;
72        ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
73    }
74}
75
76impl Debug for Mode {
77    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
78        Debug::fmt(&self.to_flags(), f)
79    }
80}
81
82#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Eq)]
83pub struct InvalidTypeFlagError(u8);
84
85impl Display for InvalidTypeFlagError {
86    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
87        f.write_fmt(format_args!("{:x} is not a valid TypeFlag", self.0))
88    }
89}
90
91impl core::error::Error for InvalidTypeFlagError {}
92
93#[derive(Copy, Clone, PartialOrd, PartialEq, Eq)]
94pub struct TypeFlagRaw(u8);
95
96impl TypeFlagRaw {
97    /// Tries to parse the underlying value as [`TypeFlag`]. This fails if the
98    /// Tar file is corrupt and the type is invalid.
99    ///
100    /// # Errors
101    /// Returns [`InvalidTypeFlagError`] for invalid values.
102    pub fn try_to_type_flag(self) -> Result<TypeFlag, InvalidTypeFlagError> {
103        TypeFlag::try_from(self)
104    }
105}
106
107impl Debug for TypeFlagRaw {
108    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
109        Debug::fmt(&self.try_to_type_flag(), f)
110    }
111}
112
113/// Describes the kind of payload, that follows after a
114/// [`PosixHeader`]. The properties of this payload are
115/// described inside the header.
116#[derive(Debug, Copy, Clone, PartialEq, Eq)]
117#[repr(u8)]
118#[allow(unused)]
119pub enum TypeFlag {
120    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
121    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
122    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
123    /// name ends with a slash as a directory.
124    REGTYPE = b'0',
125    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
126    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
127    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
128    /// name ends with a slash as a directory.
129    AREGTYPE = b'\0',
130    /// This flag represents a file linked to another file, of any type, previously archived. Such
131    /// files are identified in Unix by each file having the same device and inode number. The
132    /// linked-to name is specified in the linkname field with a trailing null.
133    LINK = b'1',
134    /// This represents a symbolic link to another file. The linked-to name is specified in the
135    /// linkname field with a trailing null.
136    SYMTYPE = b'2',
137    /// Represents character special files and block special files respectively. In this case the
138    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
139    /// Operating systems may map the device specifications to their own local specification, or
140    /// may ignore the entry.
141    CHRTYPE = b'3',
142    /// Represents character special files and block special files respectively. In this case the
143    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
144    /// Operating systems may map the device specifications to their own local specification, or
145    /// may ignore the entry.
146    BLKTYPE = b'4',
147    /// This flag specifies a directory or sub-directory. The directory name in the name field
148    /// should end with a slash. On systems where disk allocation is performed on a directory
149    /// basis, the size field will contain the maximum number of bytes (which may be rounded to
150    /// the nearest disk block allocation unit) which the directory may hold. A size field of zero
151    /// indicates no such limiting. Systems which do not support limiting in this manner should
152    /// ignore the size field.
153    DIRTYPE = b'5',
154    /// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
155    /// existence of this file and not its contents.
156    FIFOTYPE = b'6',
157    /// This specifies a contiguous file, which is the same as a normal file except that, in
158    /// operating systems which support it, all its space is allocated contiguously on the disk.
159    /// Operating systems which do not allow contiguous allocation should silently treat this type
160    /// as a normal file.
161    CONTTYPE = b'7',
162    /// Extended header referring to the next file in the archive
163    XHDTYPE = b'x',
164    /// Global extended header
165    XGLTYPE = b'g',
166}
167
168impl TypeFlag {
169    /// Whether we have a regular file.
170    #[must_use]
171    pub fn is_regular_file(self) -> bool {
172        // Equivalent. See spec.
173        self == Self::AREGTYPE || self == Self::REGTYPE
174    }
175}
176
177impl TryFrom<TypeFlagRaw> for TypeFlag {
178    type Error = InvalidTypeFlagError;
179
180    fn try_from(value: TypeFlagRaw) -> Result<Self, Self::Error> {
181        match value.0 {
182            b'0' => Ok(Self::REGTYPE),
183            b'\0' => Ok(Self::AREGTYPE),
184            b'1' => Ok(Self::LINK),
185            b'2' => Ok(Self::SYMTYPE),
186            b'3' => Ok(Self::CHRTYPE),
187            b'4' => Ok(Self::BLKTYPE),
188            b'5' => Ok(Self::DIRTYPE),
189            b'6' => Ok(Self::FIFOTYPE),
190            b'7' => Ok(Self::CONTTYPE),
191            b'x' => Ok(Self::XHDTYPE),
192            b'g' => Ok(Self::XGLTYPE),
193            e => Err(InvalidTypeFlagError(e)),
194        }
195    }
196}
197
198bitflags::bitflags! {
199    /// UNIX file permissions in octal format.
200    #[repr(transparent)]
201    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
202    pub struct ModeFlags: u64 {
203        /// Set UID on execution.
204        const SetUID = 0o4000;
205        /// Set GID on execution.
206        const SetGID = 0o2000;
207        /// Reserved.
208        const TSVTX = 0o1000;
209        /// Owner read.
210        const OwnerRead = 0o400;
211        /// Owner write.
212        const OwnerWrite = 0o200;
213        /// Owner execute.
214        const OwnerExec = 0o100;
215        /// Group read.
216        const GroupRead = 0o040;
217        /// Group write.
218        const GroupWrite = 0o020;
219        /// Group execute.
220        const GroupExec = 0o010;
221        /// Others read.
222        const OthersRead = 0o004;
223        /// Others read.
224        const OthersWrite = 0o002;
225        /// Others execute.
226        const OthersExec = 0o001;
227    }
228}
229
230/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990).
231///
232/// "New" GNU Tar versions use this archive format by default.
233/// (<https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>).
234///
235/// Each file is started by such a header, that describes the size and
236/// the file name. After that, the file content stands in chunks of 512 bytes.
237/// The number of bytes can be derived from the file size.
238///
239/// This is also mostly compatible with the "Ustar"-header and the "GNU format".
240/// Because this library mainly targets the filename, the data, and basic
241/// metadata, we don't need advanced checks for specific extensions.
242#[derive(Debug, Copy, Clone, PartialEq, Eq)]
243#[repr(C, packed)]
244pub struct PosixHeader {
245    pub name: TarFormatString<NAME_LEN>,
246    pub mode: Mode,
247    pub uid: TarFormatOctal<8>,
248    pub gid: TarFormatOctal<8>,
249    // confusing; size is stored as ASCII string
250    pub size: TarFormatOctal<12>,
251    pub mtime: TarFormatDecimal<12>,
252    pub cksum: TarFormatOctal<8>,
253    pub typeflag: TypeFlagRaw,
254    /// Name. There is always a null byte, therefore
255    /// the max len is 99.
256    pub linkname: TarFormatString<NAME_LEN>,
257    pub magic: TarFormatString<6>,
258    pub version: TarFormatString<2>,
259    /// Username. There is always a null byte, therefore
260    /// the max len is N-1.
261    pub uname: TarFormatString<32>,
262    /// Groupname. There is always a null byte, therefore
263    /// the max len is N-1.
264    pub gname: TarFormatString<32>,
265    pub dev_major: TarFormatOctal<8>,
266    pub dev_minor: TarFormatOctal<8>,
267    pub prefix: TarFormatString<PREFIX_LEN>,
268    // padding => to BLOCKSIZE bytes
269    pub _pad: [u8; 12],
270}
271
272impl PosixHeader {
273    /// Returns the number of blocks that are required to read the whole file
274    /// content. Returns an error, if the file size can't be parsed from the
275    /// header.
276    ///
277    /// # Errors
278    /// Returns a [`ParseIntError`] error if the size can't be parsed.
279    pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
280        let parsed_size = self.size.as_number::<usize>()?;
281        Ok(parsed_size.div_ceil(BLOCKSIZE))
282    }
283
284    /// A Tar archive is terminated, if an end-of-archive entry, which consists
285    /// of two 512 blocks of zero bytes, is found.
286    #[must_use]
287    pub fn is_zero_block(&self) -> bool {
288        let ptr = core::ptr::addr_of!(*self);
289        let ptr = ptr.cast::<u8>();
290
291        let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) };
292        self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use crate::BLOCKSIZE;
299    use crate::header::{PosixHeader, TypeFlag};
300    use std::mem::size_of;
301
302    /// Returns the [`PosixHeader`] at the beginning of the Tar archive.
303    fn bytes_to_archive(tar_archive_data: &[u8]) -> &PosixHeader {
304        unsafe { (tar_archive_data.as_ptr().cast::<PosixHeader>()).as_ref() }.unwrap()
305    }
306
307    #[test]
308    fn test_display_header() {
309        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
310        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
311        println!("{archive:#?}'");
312    }
313
314    #[test]
315    fn test_payload_block_count() {
316        // first file is "bye_world_513b.txt" => we expect two data blocks
317        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
318        assert_eq!(archive.payload_block_count(), Ok(2));
319    }
320
321    #[test]
322    fn test_show_tar_header_magics() {
323        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
324        println!(
325            "default: magic='{:?}', version='{:?}'",
326            archive.magic, archive.version
327        );
328        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
329        println!(
330            "gnu: magic='{:?}', version='{:?}'",
331            archive.magic, archive.version
332        );
333        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
334        println!(
335            "oldgnu: magic='{:?}', version='{:?}'",
336            archive.magic, archive.version
337        );
338        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
339        println!(
340            "pax: magic='{:?}', version='{:?}'",
341            archive.magic, archive.version
342        );
343        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
344        println!(
345            "posix: magic='{:?}', version='{:?}'",
346            archive.magic, archive.version
347        );
348        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
349        println!(
350            "ustar: magic='{:?}', version='{:?}'",
351            archive.magic, archive.version
352        );
353        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
354        println!(
355            "v7: magic='{:?}', version='{:?}'",
356            archive.magic, archive.version
357        );
358    }
359
360    #[test]
361    fn test_parse_tar_header_filename() {
362        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
363        assert_eq!(
364            archive.typeflag.try_to_type_flag(),
365            Ok(TypeFlag::REGTYPE),
366            "the first entry is a regular file!"
367        );
368        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
369
370        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
371        assert_eq!(
372            archive.typeflag.try_to_type_flag(),
373            Ok(TypeFlag::REGTYPE),
374            "the first entry is a regular file!"
375        );
376        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
377
378        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
379        assert_eq!(
380            archive.typeflag.try_to_type_flag(),
381            Ok(TypeFlag::REGTYPE),
382            "the first entry is a regular file!"
383        );
384        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
385
386        /* UNSUPPORTED YET. Uses extensions..
387        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
388        assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
389        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
390
391        /* UNSUPPORTED YET. Uses extensions.
392        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
393        unsupported extension XHDTYPE assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
394        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
395
396        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
397        assert_eq!(
398            archive.typeflag.try_to_type_flag(),
399            Ok(TypeFlag::REGTYPE),
400            "the first entry is a regular file!"
401        );
402        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
403
404        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
405        // ARegType: legacy
406        assert_eq!(
407            archive.typeflag.try_to_type_flag(),
408            Ok(TypeFlag::AREGTYPE),
409            "the first entry is a regular file!"
410        );
411        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
412    }
413
414    #[test]
415    fn test_size() {
416        assert_eq!(BLOCKSIZE, size_of::<PosixHeader>());
417    }
418}