#[derive(Debug)]
pub struct TarHeader {
pub name: String,
pub mode: u32,
pub uid: u32,
pub gid: u32,
pub size: u64,
pub mtime: u64,
pub checksum: u32,
pub typeflag: u8,
pub linkname: String,
pub magic: String,
pub version: String,
pub uname: String,
pub gname: String,
pub devmajor: u32,
pub devminor: u32,
pub prefix: String,
}
impl TarHeader {
pub fn new(name: String, mode: u32, size: u64) -> Self {
Self {
name,
mode,
size,
uid: 0,
gid: 0,
mtime: 0,
checksum: 0,
typeflag: b'0', linkname: String::new(),
magic: "ustar".to_string(),
version: "00".to_string(),
uname: String::new(),
gname: String::new(),
devmajor: 0,
devminor: 0,
prefix: String::new(),
}
}
pub fn new_full(
name: String,
mode: u32,
size: u64,
mtime: u64,
typeflag: u8,
linkname: String,
uname: String,
gname: String,
) -> Self {
let mut header = Self::new(name, mode, size);
header.mtime = mtime;
header.typeflag = typeflag;
header.linkname = linkname;
header.uname = uname;
header.gname = gname;
header
}
pub fn from_bytes(data: &[u8]) -> Self {
parse_tar_header(data)
}
pub fn to_bytes(&self) -> [u8; 512] {
create_tar_header(self)
}
pub fn verify_checksum(&self, data: &[u8]) -> bool {
let sum = calc_checksum(data);
sum == self.checksum
}
}
#[derive(Debug)]
pub struct TarEntry {
pub header: TarHeader,
pub data: Vec<u8>,
pub header_bytes: [u8; 512],
}
#[derive(Debug)]
pub struct Tar {
pub entries: Vec<TarEntry>,
pub use_header_parsing: bool, }
impl Tar {
pub fn new() -> Self {
Self {
entries: Vec::new(),
use_header_parsing: false,
}
}
pub fn from_bytes(data: &[u8]) -> Self {
let entries = read_tar(data);
Self {
entries,
use_header_parsing: false,
}
}
pub fn add_entry(&mut self, entry: TarEntry) {
self.entries.push(entry);
}
pub fn add_str_entry(&mut self, name: &str, content: &str) {
let data = content.as_bytes().to_vec();
let mut header = TarHeader::new(name.to_string(), 0o664, data.len() as u64);
header.typeflag = b'0'; let mut header_bytes = [0u8; 512];
if self.use_header_parsing {
header_bytes = header.to_bytes();
}
let entry = TarEntry {
header,
data,
header_bytes,
};
self.entries.push(entry);
}
pub fn find_entry(&self, name: &str) -> Option<&TarEntry> {
self.entries.iter().find(|e| e.header.name == name)
}
pub fn set_str(&mut self, name: &str, content: &str) {
if let Some(entry) = self.entries.iter_mut().find(|e| e.header.name == name) {
entry.data = content.as_bytes().to_vec();
entry.header.size = entry.data.len() as u64;
if self.use_header_parsing {
entry.header_bytes = entry.header.to_bytes();
}
} else {
self.add_str_entry(name, content);
}
}
pub fn get_str(&self, name: &str) -> Option<String> {
if let Some(entry) = self.entries.iter().find(|e| e.header.name == name) {
let data = String::from_utf8_lossy(&entry.data)
.trim_end_matches('\0')
.to_string();
Some(data)
} else {
None
}
}
pub fn to_bytes(&self) -> Vec<u8> {
write_tar(&self.entries)
}
}
pub fn read_tar(data: &[u8]) -> Vec<TarEntry> {
let mut entries = Vec::new();
let mut offset = 0;
while offset + 512 <= data.len() {
let header_data = &data[offset..offset + 512];
if is_empty_block(header_data) {
break;
}
let header = parse_tar_header(header_data);
let size = header.size as usize;
let data_start = offset + 512;
let data_end = data_start + size;
if data_end > data.len() {
break; }
let entry_data = data[data_start..data_end].to_vec();
let mut header_bytes = [0u8; 512];
header_bytes.copy_from_slice(header_data);
if header.typeflag == b'0' || header.typeflag == 0 {
entries.push(TarEntry { header, data: entry_data, header_bytes });
}
let padding = if size % 512 == 0 { 0 } else { 512 - (size % 512) };
offset = data_end + padding;
}
entries
}
fn is_empty_block(data: &[u8]) -> bool {
data.iter().all(|&b| b == 0)
}
use std::ops::Range;
fn read_tar_str(data: &[u8], range: Range<usize>) -> String {
String::from_utf8_lossy(&data[range])
.trim_end_matches('\0')
.trim()
.to_string()
}
fn read_tar_u32(data: &[u8], range: Range<usize>) -> u32 {
let s = read_tar_str(data, range);
if s.is_empty() {
return 0;
}
u32::from_str_radix(&s, 8).unwrap_or(0)
}
fn read_tar_u64(data: &[u8], range: Range<usize>) -> u64 {
let s = read_tar_str(data, range);
if s.is_empty() {
return 0;
}
u64::from_str_radix(&s, 8).unwrap_or(0)
}
fn read_tar_checksum(data: &[u8], range: Range<usize>) -> u32 {
let s = read_tar_str(&data, range)
.trim()
.trim_end_matches('\0')
.trim()
.to_string();
if s.is_empty() {
return 0;
}
u32::from_str_radix(&s, 8).unwrap_or(0)
}
fn parse_tar_header(data: &[u8]) -> TarHeader {
TarHeader {
name: read_tar_str(data, 0..100),
mode: read_tar_u32(data, 100..108),
uid: read_tar_u32(data, 108..116),
gid: read_tar_u32(data, 116..124),
size: read_tar_u64(data, 124..136),
mtime: read_tar_u64(data, 136..148),
checksum: read_tar_checksum(data, 148..156),
typeflag: data[156],
linkname: read_tar_str(data, 157..257),
magic: read_tar_str(data, 257..263),
version: read_tar_str(data, 263..265),
uname: read_tar_str(data, 265..297),
gname: read_tar_str(data, 297..329),
devmajor: read_tar_u32(data, 329..337),
devminor: read_tar_u32(data, 337..345),
prefix: read_tar_str(data, 345..500),
}
}
pub fn write_tar(entries: &[TarEntry]) -> Vec<u8> {
let mut tar_data = Vec::new();
for entry in entries {
let header_bytes = create_tar_header(&entry.header);
tar_data.extend_from_slice(&header_bytes);
tar_data.extend_from_slice(&entry.data);
let padding = (512 - (entry.data.len() % 512)) % 512;
tar_data.extend_from_slice(&vec![0u8; padding]);
}
tar_data.extend_from_slice(&[0u8; 1024]);
tar_data
}
fn create_tar_header(header: &TarHeader) -> [u8; 512] {
let mut data = [0u8; 512];
let name_bytes = header.name.as_bytes();
let name_len = name_bytes.len().min(100); data[0..name_len].copy_from_slice(&name_bytes[..name_len]);
let mode_str = format!("{:o}", header.mode);
let mode_bytes = mode_str.as_bytes();
data[100..100 + mode_bytes.len()].copy_from_slice(mode_bytes);
let uid_str = format!("{:o}", header.uid);
let uid_bytes = uid_str.as_bytes();
data[108..108 + uid_bytes.len()].copy_from_slice(uid_bytes);
let gid_str = format!("{:o}", header.gid);
let gid_bytes = gid_str.as_bytes();
data[116..116 + gid_bytes.len()].copy_from_slice(gid_bytes);
let size_str = format!("{:o}", header.size);
let size_bytes = size_str.as_bytes();
data[124..124 + size_bytes.len()].copy_from_slice(size_bytes);
let mtime_str = format!("{:o}", header.mtime);
let mtime_bytes = mtime_str.as_bytes();
data[136..136 + mtime_bytes.len()].copy_from_slice(mtime_bytes);
data[156] = header.typeflag;
let linkname_bytes = header.linkname.as_bytes();
let linkname_len = linkname_bytes.len().min(100);
data[157..157 + linkname_len].copy_from_slice(&linkname_bytes[..linkname_len]);
let magic_bytes = header.magic.as_bytes();
let magic_len = magic_bytes.len().min(6);
data[257..257 + magic_len].copy_from_slice(&magic_bytes[..magic_len]);
let version_bytes = header.version.as_bytes();
let version_len = version_bytes.len().min(2);
data[263..263 + version_len].copy_from_slice(&version_bytes[..version_len]);
let uname_bytes = header.uname.as_bytes();
let uname_len = uname_bytes.len().min(32);
data[265..265 + uname_len].copy_from_slice(&uname_bytes[..uname_len]);
let gname_bytes = header.gname.as_bytes();
let gname_len = gname_bytes.len().min(32);
data[297..297 + gname_len].copy_from_slice(&gname_bytes[..gname_len]);
let devmajor_str = format!("{:o}", header.devmajor);
let devmajor_bytes = devmajor_str.as_bytes();
let devmajor_len = devmajor_bytes.len().min(8);
data[329..329 + devmajor_len].copy_from_slice(&devmajor_bytes[..devmajor_len]);
let devminor_str = format!("{:o}", header.devminor);
let devminor_bytes = devminor_str.as_bytes();
let devminor_len = devminor_bytes.len().min(8);
data[337..337 + devminor_len].copy_from_slice(&devminor_bytes[..devminor_len]);
let prefix_bytes = header.prefix.as_bytes();
let prefix_len = prefix_bytes.len().min(155); data[345..345 + prefix_len].copy_from_slice(&prefix_bytes[..prefix_len]);
let checksum = calc_checksum(&data);
let checksum_str = format!("{:06o}\0 ", checksum);
let checksum_bytes = checksum_str.as_bytes();
data[148..148 + checksum_bytes.len()].copy_from_slice(checksum_bytes);
data
}
pub fn calc_checksum(data: &[u8]) -> u32 {
if data.len() < 512 {
return 0;
}
let mut sum: u32 = 0;
for (i, &b) in data.iter().take(512).enumerate() {
if (148..156).contains(&i) {
sum += b' ' as u32;
} else {
sum += b as u32;
}
}
sum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn checksum_test() {
let data = include_bytes!("../testdata/test.tar");
let entries = read_tar(data);
let test_entry = entries.iter().find(|e| e.header.name == "test.txt").unwrap();
let calculated_checksum = test_entry.header.verify_checksum(&test_entry.header_bytes);
assert!(calculated_checksum, "Checksum verification failed");
}
#[test]
fn tar_methods_test() {
let mut tar = Tar::new();
assert_eq!(tar.entries.len(), 0);
tar.add_str_entry("foo.txt", "hello");
assert_eq!(tar.entries.len(), 1);
assert_eq!(tar.entries[0].header.name, "foo.txt");
assert_eq!(String::from_utf8_lossy(&tar.entries[0].data), "hello");
tar.set_str("foo.txt", "world");
assert_eq!(tar.entries.len(), 1); assert_eq!(tar.get_str("foo.txt").as_deref(), Some("world"));
let header = TarHeader::new("bar.txt".to_string(), 0o644, 3);
let data = b"abc".to_vec();
let header_bytes = header.to_bytes();
let entry = TarEntry { header, data: data.clone(), header_bytes };
tar.add_entry(entry);
assert_eq!(tar.entries.len(), 2);
assert_eq!(tar.get_str("bar.txt").as_deref(), Some("abc"));
let found = tar.find_entry("foo.txt");
assert!(found.is_some());
assert_eq!(String::from_utf8_lossy(&found.unwrap().data), "world");
let mut tar1 = Tar::new();
tar1.add_str_entry("foo.txt", "foo");
tar1.add_str_entry("bar.txt", "bar");
let bytes = tar1.to_bytes();
println!("Bytes length: {}", bytes.len());
let tar2 = Tar::from_bytes(&bytes);
println!("Tar2: {:?}", tar2.entries);
assert_eq!(tar2.entries.len(), 2);
assert_eq!(tar2.get_str("foo.txt").as_deref(), Some("foo"));
assert_eq!(tar2.get_str("bar.txt").as_deref(), Some("bar"));
}
#[test]
fn read_test() {
let data = include_bytes!("../testdata/test.tar");
let entries = read_tar(data);
for e in &entries {
println!("{:?}", e);
}
let test_entry = entries.iter().find(|e| e.header.name == "test.txt");
assert!(test_entry.is_some());
let test_entry = test_entry.unwrap();
assert_eq!(test_entry.header.name, "test.txt");
assert_eq!(test_entry.header.size, 33);
assert_eq!(String::from_utf8_lossy(&test_entry.data), "0123456789ABCDEF__This is a pen.\n");
}
#[test]
fn write_test() {
let mut entries: Vec<TarEntry> = Vec::new();
let header = TarHeader::new("hoge.txt".to_string(), 0o644, 12);
assert_eq!(header.name, "hoge.txt");
assert_eq!(header.mode, 0o644);
assert_eq!(header.size, 12);
let data = b"Hello, World".to_vec();
let header_bytes = header.to_bytes();
entries.push(TarEntry { header, data, header_bytes });
let tar_data = write_tar(&entries);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].header.name, "hoge.txt");
assert_eq!(read_entries[0].header.size, 12);
assert_eq!(read_entries[0].data, b"Hello, World");
}
#[test]
fn security_test_oversized_name() {
let long_name = "a".repeat(200); let header = TarHeader::new(long_name.clone(), 0o644, 10);
let header_bytes = header.to_bytes();
let name_field = &header_bytes[0..100];
let null_pos = name_field.iter().position(|&b| b == 0).unwrap_or(100);
assert!(null_pos <= 100, "Name field should not exceed 100 bytes");
let data = b"Test data!".to_vec();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
assert!(read_entries[0].header.name.len() <= 100);
}
#[test]
fn security_test_oversized_prefix() {
let long_prefix = "b".repeat(200); let mut header = TarHeader::new("test.txt".to_string(), 0o644, 10);
header.prefix = long_prefix;
let header_bytes = header.to_bytes();
let prefix_field = &header_bytes[345..500];
let null_pos = prefix_field.iter().position(|&b| b == 0).unwrap_or(155);
assert!(null_pos <= 155, "Prefix field should not exceed 155 bytes");
let data = b"Test data!".to_vec();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
assert!(read_entries[0].header.prefix.len() <= 155);
}
#[test]
fn security_test_special_characters() {
let special_name = "test\0file\x00name.txt";
let header = TarHeader::new(special_name.to_string(), 0o644, 5);
let header_bytes = header.to_bytes();
let data = b"Hello".to_vec();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
}
#[test]
fn security_test_all_fields_oversized() {
let long_name = "n".repeat(150);
let long_prefix = "p".repeat(200);
let long_uname = "u".repeat(50);
let long_gname = "g".repeat(50);
let long_linkname = "l".repeat(150);
let data = b"Test".to_vec();
let header = TarHeader::new_full(
long_name,
0o644,
data.len() as u64, 0,
b'0',
long_linkname,
long_uname,
long_gname,
);
let mut header_with_prefix = header;
header_with_prefix.prefix = long_prefix;
let header_bytes = header_with_prefix.to_bytes();
assert!(header_bytes[0..100].iter().any(|&b| b != 0), "Name field should have data");
assert!(header_bytes[265..297].iter().any(|&b| b != 0), "Uname field should have data");
assert!(header_bytes[297..329].iter().any(|&b| b != 0), "Gname field should have data");
assert!(header_bytes[345..500].iter().any(|&b| b != 0), "Prefix field should have data");
let entry = TarEntry { header: header_with_prefix, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, b"Test");
}
#[test]
fn security_test_path_traversal_attack() {
let malicious_names = vec![
"../../../etc/passwd",
"../../secret.txt",
"subdir/../../outside.txt",
"/absolute/path/file.txt",
"..\\..\\windows\\path.txt",
];
for malicious_name in malicious_names {
let header = TarHeader::new(malicious_name.to_string(), 0o644, 10);
let data = b"malicious!".to_vec();
let header_bytes = header.to_bytes();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
assert!(read_entries[0].header.name.contains("..") || read_entries[0].header.name.starts_with('/'));
}
}
#[test]
fn security_test_size_mismatch() {
let header = TarHeader::new("fake_size.txt".to_string(), 0o644, 1000000); let data = b"tiny".to_vec(); let header_bytes = header.to_bytes();
let entry = TarEntry {
header,
data: data.clone(),
header_bytes
};
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert!(read_entries.len() <= 1);
}
#[test]
fn security_test_integer_overflow() {
let header = TarHeader::new("overflow.txt".to_string(), 0o644, u64::MAX);
let data = b"small".to_vec();
let header_bytes = header.to_bytes();
let entry = TarEntry { header, data, header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert!(read_entries.is_empty() || read_entries[0].data.len() < 10);
}
#[test]
fn security_test_null_byte_injection() {
let names_with_nulls = vec![
"file\0hidden.txt",
"normal.txt\0\0\0",
"\0start_null.txt",
];
for name_with_null in names_with_nulls {
let header = TarHeader::new(name_with_null.to_string(), 0o644, 5);
let data = b"test!".to_vec();
let header_bytes = header.to_bytes();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
}
}
#[test]
fn security_test_invalid_checksum() {
let mut tar = Tar::new();
tar.use_header_parsing = true;
tar.add_str_entry("test.txt", "test data!");
let mut tar_data = tar.to_bytes();
tar_data[148] = b'9'; tar_data[149] = b'9'; let tar2 = Tar::from_bytes(&tar_data);
assert!(!tar2.entries[0].header.verify_checksum(&tar2.entries[0].header_bytes));
}
#[test]
fn security_test_symlink_in_archive() {
let mut header = TarHeader::new("symlink.txt".to_string(), 0o777, 0);
header.typeflag = b'2'; header.linkname = "/etc/passwd".to_string();
let header_bytes = header.to_bytes();
let entry = TarEntry {
header,
data: Vec::new(),
header_bytes
};
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 0);
}
#[test]
fn security_test_device_file_in_archive() {
let test_cases = vec![
(b'3', "char_device"), (b'4', "block_device"), (b'5', "directory"), (b'6', "fifo"), ];
for (typeflag, name) in test_cases {
let mut header = TarHeader::new(name.to_string(), 0o644, 0);
header.typeflag = typeflag;
let header_bytes = header.to_bytes();
let entry = TarEntry {
header,
data: Vec::new(),
header_bytes
};
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 0, "Typeflag {} should be filtered", typeflag);
}
}
#[test]
fn security_test_deeply_nested_path() {
let deep_path = "a/".repeat(50) + "file.txt"; let header = TarHeader::new(deep_path.clone(), 0o644, 4);
let data = b"deep".to_vec();
let header_bytes = header.to_bytes();
let entry = TarEntry { header, data: data.clone(), header_bytes };
let tar_data = write_tar(&[entry]);
let read_entries = read_tar(&tar_data);
assert_eq!(read_entries.len(), 1);
assert_eq!(read_entries[0].data, data);
assert!(read_entries[0].header.name.len() <= 100);
}
#[test]
fn security_test_malformed_archive_early_termination() {
let header = TarHeader::new("incomplete.txt".to_string(), 0o644, 1000);
let data = b"short".to_vec(); let header_bytes = header.to_bytes();
let mut tar_data = Vec::new();
tar_data.extend_from_slice(&header_bytes);
tar_data.extend_from_slice(&data);
let read_entries = read_tar(&tar_data);
assert!(read_entries.is_empty() || read_entries[0].data.len() <= 5);
}
}