tar_no_std/
header.rs

1/*
2MIT License
3
4Copyright (c) 2023 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24//! TAR header definition taken from <https://www.gnu.org/software/tar/manual/html_node/Standard.html>.
25//! A Tar-archive is a collection of 512-byte sized blocks. Unfortunately there are several
26//! TAR-like archive specifications. An Overview can be found here:
27//! <https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>
28//!
29//! This library focuses on extracting files from the GNU Tar format.
30
31#![allow(non_upper_case_globals)]
32
33use crate::{TarFormatDecimal, TarFormatOctal, TarFormatString, BLOCKSIZE, NAME_LEN, PREFIX_LEN};
34use core::fmt::{Debug, Display, Formatter};
35use core::num::ParseIntError;
36
37/// Errors that may happen when parsing the [`ModeFlags`].
38#[derive(Debug)]
39pub enum ModeError {
40    ParseInt(ParseIntError),
41    IllegalMode,
42}
43
44/// Wrapper around the UNIX file permissions given in octal ASCII.
45#[derive(Copy, Clone, PartialEq, Eq)]
46#[repr(transparent)]
47pub struct Mode(TarFormatOctal<8>);
48
49impl Mode {
50    /// Parses the [`ModeFlags`] from the mode string.
51    pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
52        let bits = self.0.as_number::<u64>().map_err(ModeError::ParseInt)?;
53        ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
54    }
55}
56
57impl Debug for Mode {
58    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
59        Debug::fmt(&self.to_flags(), f)
60    }
61}
62
63#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Eq)]
64pub struct InvalidTypeFlagError(u8);
65
66impl Display for InvalidTypeFlagError {
67    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
68        f.write_fmt(format_args!("{:x} is not a valid TypeFlag", self.0))
69    }
70}
71
72#[cfg(feature = "unstable")]
73impl core::error::Error for InvalidTypeFlagError {}
74
75#[derive(Copy, Clone, PartialOrd, PartialEq, Eq)]
76pub struct TypeFlagRaw(u8);
77
78impl TypeFlagRaw {
79    /// Tries to parse the underlying value as [`TypeFlag`]. This fails if the
80    /// Tar file is corrupt and the type is invalid.
81    pub fn try_to_type_flag(self) -> Result<TypeFlag, InvalidTypeFlagError> {
82        TypeFlag::try_from(self)
83    }
84}
85
86impl Debug for TypeFlagRaw {
87    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
88        Debug::fmt(&self.try_to_type_flag(), f)
89    }
90}
91
92/// Describes the kind of payload, that follows after a
93/// [`PosixHeader`]. The properties of this payload are
94/// described inside the header.
95#[derive(Debug, Copy, Clone, PartialEq, Eq)]
96#[repr(u8)]
97#[allow(unused)]
98pub enum TypeFlag {
99    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
100    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
101    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
102    /// name ends with a slash as a directory.
103    REGTYPE = b'0',
104    /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
105    /// value of AREGTYPE should be silently recognized as a regular file. New archives should be
106    /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
107    /// name ends with a slash as a directory.
108    AREGTYPE = b'\0',
109    /// This flag represents a file linked to another file, of any type, previously archived. Such
110    /// files are identified in Unix by each file having the same device and inode number. The
111    /// linked-to name is specified in the linkname field with a trailing null.
112    LINK = b'1',
113    /// This represents a symbolic link to another file. The linked-to name is specified in the
114    /// linkname field with a trailing null.
115    SYMTYPE = b'2',
116    /// Represents character special files and block special files respectively. In this case the
117    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
118    /// Operating systems may map the device specifications to their own local specification, or
119    /// may ignore the entry.
120    CHRTYPE = b'3',
121    /// Represents character special files and block special files respectively. In this case the
122    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
123    /// Operating systems may map the device specifications to their own local specification, or
124    /// may ignore the entry.
125    BLKTYPE = b'4',
126    /// This flag specifies a directory or sub-directory. The directory name in the name field
127    /// should end with a slash. On systems where disk allocation is performed on a directory
128    /// basis, the size field will contain the maximum number of bytes (which may be rounded to
129    /// the nearest disk block allocation unit) which the directory may hold. A size field of zero
130    /// indicates no such limiting. Systems which do not support limiting in this manner should
131    /// ignore the size field.
132    DIRTYPE = b'5',
133    /// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
134    /// existence of this file and not its contents.
135    FIFOTYPE = b'6',
136    /// This specifies a contiguous file, which is the same as a normal file except that, in
137    /// operating systems which support it, all its space is allocated contiguously on the disk.
138    /// Operating systems which do not allow contiguous allocation should silently treat this type
139    /// as a normal file.
140    CONTTYPE = b'7',
141    /// Extended header referring to the next file in the archive
142    XHDTYPE = b'x',
143    /// Global extended header
144    XGLTYPE = b'g',
145}
146
147impl TypeFlag {
148    /// Whether we have a regular file.
149    #[must_use]
150    pub fn is_regular_file(self) -> bool {
151        // Equivalent. See spec.
152        self == Self::AREGTYPE || self == Self::REGTYPE
153    }
154}
155
156impl TryFrom<TypeFlagRaw> for TypeFlag {
157    type Error = InvalidTypeFlagError;
158
159    fn try_from(value: TypeFlagRaw) -> Result<Self, Self::Error> {
160        match value.0 {
161            b'0' => Ok(Self::REGTYPE),
162            b'\0' => Ok(Self::AREGTYPE),
163            b'1' => Ok(Self::LINK),
164            b'2' => Ok(Self::SYMTYPE),
165            b'3' => Ok(Self::CHRTYPE),
166            b'4' => Ok(Self::BLKTYPE),
167            b'5' => Ok(Self::DIRTYPE),
168            b'6' => Ok(Self::FIFOTYPE),
169            b'7' => Ok(Self::CONTTYPE),
170            b'x' => Ok(Self::XHDTYPE),
171            b'g' => Ok(Self::XGLTYPE),
172            e => Err(InvalidTypeFlagError(e)),
173        }
174    }
175}
176
177bitflags::bitflags! {
178    /// UNIX file permissions in octal format.
179    #[repr(transparent)]
180    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
181    pub struct ModeFlags: u64 {
182        /// Set UID on execution.
183        const SetUID = 0o4000;
184        /// Set GID on execution.
185        const SetGID = 0o2000;
186        /// Reserved.
187        const TSVTX = 0o1000;
188        /// Owner read.
189        const OwnerRead = 0o400;
190        /// Owner write.
191        const OwnerWrite = 0o200;
192        /// Owner execute.
193        const OwnerExec = 0o100;
194        /// Group read.
195        const GroupRead = 0o040;
196        /// Group write.
197        const GroupWrite = 0o020;
198        /// Group execute.
199        const GroupExec = 0o010;
200        /// Others read.
201        const OthersRead = 0o004;
202        /// Others read.
203        const OthersWrite = 0o002;
204        /// Others execute.
205        const OthersExec = 0o001;
206    }
207}
208
209/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990).
210///
211/// "New" GNU Tar versions use this archive format by default.
212/// (<https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>).
213///
214/// Each file is started by such a header, that describes the size and
215/// the file name. After that, the file content stands in chunks of 512 bytes.
216/// The number of bytes can be derived from the file size.
217///
218/// This is also mostly compatible with the "Ustar"-header and the "GNU format".
219/// Because this library mainly targets the filename, the data, and basic
220/// metadata, we don't need advanced checks for specific extensions.
221#[derive(Debug, Copy, Clone, PartialEq, Eq)]
222#[repr(C, packed)]
223pub struct PosixHeader {
224    pub name: TarFormatString<NAME_LEN>,
225    pub mode: Mode,
226    pub uid: TarFormatOctal<8>,
227    pub gid: TarFormatOctal<8>,
228    // confusing; size is stored as ASCII string
229    pub size: TarFormatOctal<12>,
230    pub mtime: TarFormatDecimal<12>,
231    pub cksum: TarFormatOctal<8>,
232    pub typeflag: TypeFlagRaw,
233    /// Name. There is always a null byte, therefore
234    /// the max len is 99.
235    pub linkname: TarFormatString<NAME_LEN>,
236    pub magic: TarFormatString<6>,
237    pub version: TarFormatString<2>,
238    /// Username. There is always a null byte, therefore
239    /// the max len is N-1.
240    pub uname: TarFormatString<32>,
241    /// Groupname. There is always a null byte, therefore
242    /// the max len is N-1.
243    pub gname: TarFormatString<32>,
244    pub dev_major: TarFormatOctal<8>,
245    pub dev_minor: TarFormatOctal<8>,
246    pub prefix: TarFormatString<PREFIX_LEN>,
247    // padding => to BLOCKSIZE bytes
248    pub _pad: [u8; 12],
249}
250
251impl PosixHeader {
252    /// Returns the number of blocks that are required to read the whole file
253    /// content. Returns an error, if the file size can't be parsed from the
254    /// header.
255    pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
256        let parsed_size = self.size.as_number::<usize>()?;
257        Ok(parsed_size.div_ceil(BLOCKSIZE))
258    }
259
260    /// A Tar archive is terminated, if an end-of-archive entry, which consists
261    /// of two 512 blocks of zero bytes, is found.
262    #[must_use]
263    pub fn is_zero_block(&self) -> bool {
264        let ptr = self as *const Self as *const u8;
265        let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) };
266        self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use crate::header::{PosixHeader, TypeFlag};
273    use crate::BLOCKSIZE;
274    use std::mem::size_of;
275
276    /// Returns the PosixHeader at the beginning of the Tar archive.
277    fn bytes_to_archive(tar_archive_data: &[u8]) -> &PosixHeader {
278        unsafe { (tar_archive_data.as_ptr() as *const PosixHeader).as_ref() }.unwrap()
279    }
280
281    #[test]
282    fn test_display_header() {
283        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
284        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
285        println!("{:#?}'", archive);
286    }
287
288    #[test]
289    fn test_payload_block_count() {
290        // first file is "bye_world_513b.txt" => we expect two data blocks
291        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
292        assert_eq!(archive.payload_block_count(), Ok(2));
293    }
294
295    #[test]
296    fn test_show_tar_header_magics() {
297        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
298        println!(
299            "default: magic='{:?}', version='{:?}'",
300            archive.magic, archive.version
301        );
302        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
303        println!(
304            "gnu: magic='{:?}', version='{:?}'",
305            archive.magic, archive.version
306        );
307        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
308        println!(
309            "oldgnu: magic='{:?}', version='{:?}'",
310            archive.magic, archive.version
311        );
312        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
313        println!(
314            "pax: magic='{:?}', version='{:?}'",
315            archive.magic, archive.version
316        );
317        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
318        println!(
319            "posix: magic='{:?}', version='{:?}'",
320            archive.magic, archive.version
321        );
322        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
323        println!(
324            "ustar: magic='{:?}', version='{:?}'",
325            archive.magic, archive.version
326        );
327        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
328        println!(
329            "v7: magic='{:?}', version='{:?}'",
330            archive.magic, archive.version
331        );
332    }
333
334    #[test]
335    fn test_parse_tar_header_filename() {
336        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
337        assert_eq!(
338            archive.typeflag.try_to_type_flag(),
339            Ok(TypeFlag::REGTYPE),
340            "the first entry is a regular file!"
341        );
342        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
343
344        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
345        assert_eq!(
346            archive.typeflag.try_to_type_flag(),
347            Ok(TypeFlag::REGTYPE),
348            "the first entry is a regular file!"
349        );
350        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
351
352        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
353        assert_eq!(
354            archive.typeflag.try_to_type_flag(),
355            Ok(TypeFlag::REGTYPE),
356            "the first entry is a regular file!"
357        );
358        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
359
360        /* UNSUPPORTED YET. Uses extensions..
361        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
362        assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
363        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
364
365        /* UNSUPPORTED YET. Uses extensions.
366        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
367        unsupported extension XHDTYPE assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
368        assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
369
370        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
371        assert_eq!(
372            archive.typeflag.try_to_type_flag(),
373            Ok(TypeFlag::REGTYPE),
374            "the first entry is a regular file!"
375        );
376        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
377
378        let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
379        // ARegType: legacy
380        assert_eq!(
381            archive.typeflag.try_to_type_flag(),
382            Ok(TypeFlag::AREGTYPE),
383            "the first entry is a regular file!"
384        );
385        assert_eq!(archive.name.as_str(), Ok("bye_world_513b.txt"));
386    }
387
388    #[test]
389    fn test_size() {
390        assert_eq!(BLOCKSIZE, size_of::<PosixHeader>());
391    }
392}