tar_no_std/
header.rs

1/*
2MIT License
3
4Copyright (c) 2025 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24//! TAR header definition taken from <https://www.gnu.org/software/tar/manual/html_node/Standard.html>.
25//! A Tar-archive is a collection of 512-byte sized blocks. Unfortunately there are several
26//! TAR-like archive specifications. An Overview can be found here:
27//! <https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>
28//!
29//! This library focuses on extracting files from the GNU Tar format.
30
31#![allow(non_upper_case_globals)]
32
33use crate::{BLOCKSIZE, NAME_LEN, PREFIX_LEN, TarFormatDecimal, TarFormatOctal, TarFormatString};
34use core::fmt::{Debug, Display, Formatter};
35use core::num::ParseIntError;
36
37/// Errors that may happen when parsing the [`ModeFlags`].
38#[derive(Debug)]
39pub enum ModeError {
40    ParseInt(ParseIntError),
41    IllegalMode,
42}
43
44/// Wrapper around the UNIX file permissions given in octal ASCII.
45#[derive(Copy, Clone, PartialEq, Eq)]
46#[repr(transparent)]
47pub struct Mode(TarFormatOctal<8>);
48
49impl Mode {
50    /// Parses the [`ModeFlags`] from the mode string.
51    ///
52    /// # Errors
53    /// Returns [`ModeError`] for invalid values.
54    pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
55        let bits = self.0.as_number::<u64>().map_err(ModeError::ParseInt)?;
56        ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
57    }
58}
59
60impl Debug for Mode {
61    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
62        Debug::fmt(&self.to_flags(), f)
63    }
64}
65
66#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Eq)]
67pub struct InvalidTypeFlagError(u8);
68
69impl Display for InvalidTypeFlagError {
70    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
71        f.write_fmt(format_args!("{:x} is not a valid TypeFlag", self.0))
72    }
73}
74
75impl core::error::Error for InvalidTypeFlagError {}
76
77#[derive(Copy, Clone, PartialOrd, PartialEq, Eq)]
78pub struct TypeFlagRaw(u8);
79
80impl TypeFlagRaw {
81    /// Tries to parse the underlying value as [`TypeFlag`]. This fails if the
82    /// Tar file is corrupt and the type is invalid.
83    ///
84    /// # Errors
85    /// Returns [`InvalidTypeFlagError`] for invalid values.
86    pub fn try_to_type_flag(self) -> Result<TypeFlag, InvalidTypeFlagError> {
87        TypeFlag::try_from(self)
88    }
89}
90
91impl Debug for TypeFlagRaw {
92    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
93        Debug::fmt(&self.try_to_type_flag(), f)
94    }
95}
96
97/// Describes the kind of payload, that follows after a
98/// [`PosixHeader`]. The properties of this payload are
99/// described inside the header.
100#[derive(Debug, Copy, Clone, PartialEq, Eq)]
101#[repr(u8)]
102#[allow(unused)]
103pub enum TypeFlag {
104    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
105    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
106    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
107    /// name ends with a slash as a directory.
108    REGTYPE = b'0',
109    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
110    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
111    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
112    /// name ends with a slash as a directory.
113    AREGTYPE = b'\0',
114    /// This flag represents a file linked to another file, of any type, previously archived. Such
115    /// files are identified in Unix by each file having the same device and inode number. The
116    /// linked-to name is specified in the linkname field with a trailing null.
117    LINK = b'1',
118    /// This represents a symbolic link to another file. The linked-to name is specified in the
119    /// linkname field with a trailing null.
120    SYMTYPE = b'2',
121    /// Represents character special files and block special files respectively. In this case the
122    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
123    /// Operating systems may map the device specifications to their own local specification, or
124    /// may ignore the entry.
125    CHRTYPE = b'3',
126    /// Represents character special files and block special files respectively. In this case the
127    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
128    /// Operating systems may map the device specifications to their own local specification, or
129    /// may ignore the entry.
130    BLKTYPE = b'4',
131    /// This flag specifies a directory or sub-directory. The directory name in the name field
132    /// should end with a slash. On systems where disk allocation is performed on a directory
133    /// basis, the size field will contain the maximum number of bytes (which may be rounded to
134    /// the nearest disk block allocation unit) which the directory may hold. A size field of zero
135    /// indicates no such limiting. Systems which do not support limiting in this manner should
136    /// ignore the size field.
137    DIRTYPE = b'5',
138    /// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
139    /// existence of this file and not its contents.
140    FIFOTYPE = b'6',
141    /// This specifies a contiguous file, which is the same as a normal file except that, in
142    /// operating systems which support it, all its space is allocated contiguously on the disk.
143    /// Operating systems which do not allow contiguous allocation should silently treat this type
144    /// as a normal file.
145    CONTTYPE = b'7',
146    /// Extended header referring to the next file in the archive
147    XHDTYPE = b'x',
148    /// Global extended header
149    XGLTYPE = b'g',
150}
151
152impl TypeFlag {
153    /// Whether we have a regular file.
154    #[must_use]
155    pub fn is_regular_file(self) -> bool {
156        // Equivalent. See spec.
157        self == Self::AREGTYPE || self == Self::REGTYPE
158    }
159}
160
161impl TryFrom<TypeFlagRaw> for TypeFlag {
162    type Error = InvalidTypeFlagError;
163
164    fn try_from(value: TypeFlagRaw) -> Result<Self, Self::Error> {
165        match value.0 {
166            b'0' => Ok(Self::REGTYPE),
167            b'\0' => Ok(Self::AREGTYPE),
168            b'1' => Ok(Self::LINK),
169            b'2' => Ok(Self::SYMTYPE),
170            b'3' => Ok(Self::CHRTYPE),
171            b'4' => Ok(Self::BLKTYPE),
172            b'5' => Ok(Self::DIRTYPE),
173            b'6' => Ok(Self::FIFOTYPE),
174            b'7' => Ok(Self::CONTTYPE),
175            b'x' => Ok(Self::XHDTYPE),
176            b'g' => Ok(Self::XGLTYPE),
177            e => Err(InvalidTypeFlagError(e)),
178        }
179    }
180}
181
182bitflags::bitflags! {
183    /// UNIX file permissions in octal format.
184    #[repr(transparent)]
185    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
186    pub struct ModeFlags: u64 {
187        /// Set UID on execution.
188        const SetUID = 0o4000;
189        /// Set GID on execution.
190        const SetGID = 0o2000;
191        /// Reserved.
192        const TSVTX = 0o1000;
193        /// Owner read.
194        const OwnerRead = 0o400;
195        /// Owner write.
196        const OwnerWrite = 0o200;
197        /// Owner execute.
198        const OwnerExec = 0o100;
199        /// Group read.
200        const GroupRead = 0o040;
201        /// Group write.
202        const GroupWrite = 0o020;
203        /// Group execute.
204        const GroupExec = 0o010;
205        /// Others read.
206        const OthersRead = 0o004;
207        /// Others read.
208        const OthersWrite = 0o002;
209        /// Others execute.
210        const OthersExec = 0o001;
211    }
212}
213
214/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990).
215///
216/// "New" GNU Tar versions use this archive format by default.
217/// (<https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>).
218///
219/// Each file is started by such a header, that describes the size and
220/// the file name. After that, the file content stands in chunks of 512 bytes.
221/// The number of bytes can be derived from the file size.
222///
223/// This is also mostly compatible with the "Ustar"-header and the "GNU format".
224/// Because this library mainly targets the filename, the data, and basic
225/// metadata, we don't need advanced checks for specific extensions.
226#[derive(Debug, Copy, Clone, PartialEq, Eq)]
227#[repr(C, packed)]
228pub struct PosixHeader {
229    pub name: TarFormatString<NAME_LEN>,
230    pub mode: Mode,
231    pub uid: TarFormatOctal<8>,
232    pub gid: TarFormatOctal<8>,
233    // confusing; size is stored as ASCII string
234    pub size: TarFormatOctal<12>,
235    pub mtime: TarFormatDecimal<12>,
236    pub cksum: TarFormatOctal<8>,
237    pub typeflag: TypeFlagRaw,
238    /// Name. There is always a null byte, therefore
239    /// the max len is 99.
240    pub linkname: TarFormatString<NAME_LEN>,
241    pub magic: TarFormatString<6>,
242    pub version: TarFormatString<2>,
243    /// Username. There is always a null byte, therefore
244    /// the max len is N-1.
245    pub uname: TarFormatString<32>,
246    /// Groupname. There is always a null byte, therefore
247    /// the max len is N-1.
248    pub gname: TarFormatString<32>,
249    pub dev_major: TarFormatOctal<8>,
250    pub dev_minor: TarFormatOctal<8>,
251    pub prefix: TarFormatString<PREFIX_LEN>,
252    // padding => to BLOCKSIZE bytes
253    pub _pad: [u8; 12],
254}
255
256impl PosixHeader {
257    /// Returns the number of blocks that are required to read the whole file
258    /// content. Returns an error, if the file size can't be parsed from the
259    /// header.
260    ///
261    /// # Errors
262    /// Returns a [`ParseIntError`] error if the size can't be parsed.
263    pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
264        let parsed_size = self.size.as_number::<usize>()?;
265        Ok(parsed_size.div_ceil(BLOCKSIZE))
266    }
267
268    /// A Tar archive is terminated, if an end-of-archive entry, which consists
269    /// of two 512 blocks of zero bytes, is found.
270    #[must_use]
271    pub fn is_zero_block(&self) -> bool {
272        let ptr = core::ptr::addr_of!(*self);
273        let ptr = ptr.cast::<u8>();
274
275        let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) };
276        self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use crate::BLOCKSIZE;
283    use crate::header::{PosixHeader, TypeFlag};
284    use std::mem::size_of;
285
286    /// Returns the PosixHeader at the beginning of the Tar archive.
287    fn bytes_to_archive(tar_archive_data: &[u8]) -> &PosixHeader {
288        unsafe { (tar_archive_data.as_ptr() as *const PosixHeader).as_ref() }.unwrap()
289    }
290
291    #[test]
292    fn test_display_header() {
293        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
294        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
295        println!("{:#?}'", archive);
296    }
297
298    #[test]
299    fn test_payload_block_count() {
300        // first file is "bye_world_513b.txt" => we expect two data blocks
301        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
302        assert_eq!(archive.payload_block_count(), Ok(2));
303    }
304
305    #[test]
306    fn test_show_tar_header_magics() {
307        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
308        println!(
309            "default: magic='{:?}', version='{:?}'",
310            archive.magic, archive.version
311        );
312        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
313        println!(
314            "gnu: magic='{:?}', version='{:?}'",
315            archive.magic, archive.version
316        );
317        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
318        println!(
319            "oldgnu: magic='{:?}', version='{:?}'",
320            archive.magic, archive.version
321        );
322        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
323        println!(
324            "pax: magic='{:?}', version='{:?}'",
325            archive.magic, archive.version
326        );
327        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
328        println!(
329            "posix: magic='{:?}', version='{:?}'",
330            archive.magic, archive.version
331        );
332        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
333        println!(
334            "ustar: magic='{:?}', version='{:?}'",
335            archive.magic, archive.version
336        );
337        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
338        println!(
339            "v7: magic='{:?}', version='{:?}'",
340            archive.magic, archive.version
341        );
342    }
343
344    #[test]
345    fn test_parse_tar_header_filename() {
346        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
347        assert_eq!(
348            archive.typeflag.try_to_type_flag(),
349            Ok(TypeFlag::REGTYPE),
350            "the first entry is a regular file!"
351        );
352        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
353
354        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
355        assert_eq!(
356            archive.typeflag.try_to_type_flag(),
357            Ok(TypeFlag::REGTYPE),
358            "the first entry is a regular file!"
359        );
360        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
361
362        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
363        assert_eq!(
364            archive.typeflag.try_to_type_flag(),
365            Ok(TypeFlag::REGTYPE),
366            "the first entry is a regular file!"
367        );
368        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
369
370        /* UNSUPPORTED YET. Uses extensions..
371        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
372        assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
373        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
374
375        /* UNSUPPORTED YET. Uses extensions.
376        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
377        unsupported extension XHDTYPE assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
378        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
379
380        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
381        assert_eq!(
382            archive.typeflag.try_to_type_flag(),
383            Ok(TypeFlag::REGTYPE),
384            "the first entry is a regular file!"
385        );
386        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
387
388        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
389        // ARegType: legacy
390        assert_eq!(
391            archive.typeflag.try_to_type_flag(),
392            Ok(TypeFlag::AREGTYPE),
393            "the first entry is a regular file!"
394        );
395        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
396    }
397
398    #[test]
399    fn test_size() {
400        assert_eq!(BLOCKSIZE, size_of::<PosixHeader>());
401    }
402}