Skip to main content

unarc_rs/tar/
mod.rs

1//! TAR archive format support
2//!
3//! TAR (Tape Archive) is a file format used for storing multiple files in a single archive.
4//! It was originally developed for tape backup systems but is now commonly used for
5//! software distribution and file archiving, often in combination with compression
6//! formats like gzip (.tar.gz) or bzip2 (.tar.bz2).
7//!
8//! This module provides read-only access to TAR archives using the `tar` crate.
9
10use std::io::{Read, Seek, SeekFrom};
11
12use crate::date_time::DosDateTime;
13use crate::error::{ArchiveError, Result};
14
15/// TAR file header information
16#[derive(Debug, Clone)]
17pub struct TarFileHeader {
18    /// File name (may include path)
19    pub name: String,
20    /// File size in bytes
21    pub size: u64,
22    /// Modification time as Unix timestamp
23    pub mtime: u64,
24    /// File mode/permissions
25    pub mode: u32,
26    /// Entry type (file, directory, symlink, etc.)
27    pub entry_type: TarEntryType,
28    /// Link name for symlinks/hardlinks
29    pub link_name: Option<String>,
30}
31
32/// TAR entry types
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum TarEntryType {
35    /// Regular file
36    Regular,
37    /// Hard link
38    HardLink,
39    /// Symbolic link
40    Symlink,
41    /// Character device
42    Char,
43    /// Block device
44    Block,
45    /// Directory
46    Directory,
47    /// FIFO (named pipe)
48    Fifo,
49    /// Continuous file (GNU extension)
50    Continuous,
51    /// Other/unknown type
52    Other(u8),
53}
54
55impl From<tar::EntryType> for TarEntryType {
56    fn from(t: tar::EntryType) -> Self {
57        match t {
58            tar::EntryType::Regular => TarEntryType::Regular,
59            tar::EntryType::Link => TarEntryType::HardLink,
60            tar::EntryType::Symlink => TarEntryType::Symlink,
61            tar::EntryType::Char => TarEntryType::Char,
62            tar::EntryType::Block => TarEntryType::Block,
63            tar::EntryType::Directory => TarEntryType::Directory,
64            tar::EntryType::Fifo => TarEntryType::Fifo,
65            tar::EntryType::Continuous => TarEntryType::Continuous,
66            _ => TarEntryType::Other(t.as_byte()),
67        }
68    }
69}
70
71impl TarFileHeader {
72    /// Convert Unix timestamp to DOS datetime
73    pub fn modified_time(&self) -> Option<DosDateTime> {
74        // Convert Unix timestamp to DOS datetime
75        // Unix epoch is 1970-01-01, DOS epoch is 1980-01-01
76        if self.mtime == 0 {
77            return None;
78        }
79
80        // Use chrono to convert
81        use chrono::{Datelike, TimeZone, Timelike, Utc};
82        if let Some(dt) = Utc.timestamp_opt(self.mtime as i64, 0).single() {
83            let year = dt.year() as u16;
84            let month = dt.month() as u16;
85            let day = dt.day() as u16;
86            let hour = dt.hour() as u16;
87            let minute = dt.minute() as u16;
88            let second = dt.second() as u16;
89
90            if year >= 1980 {
91                // DOS datetime format: date in high 16 bits, time in low 16 bits
92                let dos_date = ((year - 1980) << 9) | (month << 5) | day;
93                let dos_time = (hour << 11) | (minute << 5) | (second / 2);
94                // Combine into u32: date in high word, time in low word
95                let combined = ((dos_date as u32) << 16) | (dos_time as u32);
96                return Some(DosDateTime::new(combined));
97            }
98        }
99        None
100    }
101}
102
103/// Internal entry tracking for TAR archives
104struct TarEntry {
105    header: TarFileHeader,
106    /// Offset in the archive where the file data starts
107    data_offset: u64,
108}
109
110/// TAR archive reader
111pub struct TarArchive<T: Read + Seek> {
112    reader: T,
113    /// List of all entries (pre-scanned)
114    entries: Vec<TarEntry>,
115    /// Current entry index
116    current_index: usize,
117}
118
119impl<T: Read + Seek> TarArchive<T> {
120    /// Create a new TAR archive reader
121    pub fn new(mut reader: T) -> Result<Self> {
122        // Pre-scan all entries to allow random access
123        let mut entries = Vec::new();
124
125        // Reset to start
126        reader.seek(SeekFrom::Start(0))?;
127
128        {
129            let mut archive = tar::Archive::new(&mut reader);
130
131            for entry_result in archive
132                .entries()
133                .map_err(|e| ArchiveError::io_error(format!("Failed to read TAR entries: {}", e)))?
134            {
135                let entry = entry_result.map_err(|e| ArchiveError::io_error(format!("Failed to read TAR entry: {}", e)))?;
136
137                let header = entry.header();
138                let name = entry
139                    .path()
140                    .map_err(|e| ArchiveError::io_error(format!("Failed to read entry path: {}", e)))?
141                    .to_string_lossy()
142                    .to_string();
143
144                let size = header.size().unwrap_or(0);
145                let mtime = header.mtime().unwrap_or(0);
146                let mode = header.mode().unwrap_or(0);
147                let entry_type = header.entry_type().into();
148                let link_name = header.link_name().ok().flatten().map(|p| p.to_string_lossy().to_string());
149
150                let raw_header_position = entry.raw_header_position();
151                // Data starts after the 512-byte header
152                let data_offset = raw_header_position + 512;
153
154                entries.push(TarEntry {
155                    header: TarFileHeader {
156                        name,
157                        size,
158                        mtime,
159                        mode,
160                        entry_type,
161                        link_name,
162                    },
163                    data_offset,
164                });
165            }
166        }
167
168        // Reset reader position
169        reader.seek(SeekFrom::Start(0))?;
170
171        Ok(Self {
172            reader,
173            entries,
174            current_index: 0,
175        })
176    }
177
178    /// Get the next entry in the archive
179    pub fn get_next_entry(&mut self) -> Result<Option<TarFileHeader>> {
180        if self.current_index >= self.entries.len() {
181            return Ok(None);
182        }
183
184        let entry = &self.entries[self.current_index];
185        Ok(Some(entry.header.clone()))
186    }
187
188    /// Skip the current entry
189    pub fn skip(&mut self, _header: &TarFileHeader) -> Result<()> {
190        if self.current_index < self.entries.len() {
191            self.current_index += 1;
192        }
193        Ok(())
194    }
195
196    /// Read the contents of the current entry
197    pub fn read(&mut self, header: &TarFileHeader) -> Result<Vec<u8>> {
198        // Find the entry by name (in case entries were iterated out of order)
199        let entry = self
200            .entries
201            .iter()
202            .find(|e| e.header.name == header.name)
203            .ok_or_else(|| ArchiveError::io_error(format!("Entry not found: {}", header.name)))?;
204
205        // Seek to data position
206        self.reader.seek(SeekFrom::Start(entry.data_offset))?;
207
208        // Read the data
209        let mut data = vec![0u8; entry.header.size as usize];
210        self.reader.read_exact(&mut data)?;
211
212        // Advance to next entry
213        self.current_index += 1;
214
215        Ok(data)
216    }
217
218    /// Get the total number of entries
219    pub fn entry_count(&self) -> usize {
220        self.entries.len()
221    }
222}