use alloc::collections::BTreeMap;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
use super::types::{ArchiveEntry, ArchiveError, CompressionMethod, ExtractResult};
const TAR_BLOCK_SIZE: usize = 512;
const USTAR_MAGIC: &[u8; 6] = b"ustar ";
const GNU_MAGIC: &[u8; 6] = b"ustar\0";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TarType {
RegularFile,
HardLink,
SymLink,
CharDevice,
BlockDevice,
Directory,
Fifo,
Unknown(u8),
}
impl TarType {
fn from_byte(b: u8) -> Self {
match b {
b'0' | 0 => TarType::RegularFile,
b'1' => TarType::HardLink,
b'2' => TarType::SymLink,
b'3' => TarType::CharDevice,
b'4' => TarType::BlockDevice,
b'5' => TarType::Directory,
b'6' => TarType::Fifo,
_ => TarType::Unknown(b),
}
}
fn as_byte(self) -> u8 {
match self {
TarType::RegularFile => b'0',
TarType::HardLink => b'1',
TarType::SymLink => b'2',
TarType::CharDevice => b'3',
TarType::BlockDevice => b'4',
TarType::Directory => b'5',
TarType::Fifo => b'6',
TarType::Unknown(b) => b,
}
}
}
#[derive(Debug, Clone)]
pub struct TarHeader {
pub name: String,
pub mode: u32,
pub uid: u32,
pub gid: u32,
pub size: u64,
pub mtime: u64,
pub typeflag: TarType,
pub linkname: String,
pub uname: String,
pub gname: String,
pub devmajor: u32,
pub devminor: u32,
pub prefix: String,
}
impl TarHeader {
pub fn from_bytes(data: &[u8]) -> Option<Self> {
if data.len() < TAR_BLOCK_SIZE {
return None;
}
if data[..TAR_BLOCK_SIZE].iter().all(|&b| b == 0) {
return None;
}
let stored_checksum = parse_octal(&data[148..156]);
let computed_checksum = compute_checksum(data);
if stored_checksum != computed_checksum {
let alt_checksum = compute_checksum_with_spaces(data);
if stored_checksum != alt_checksum {
return None;
}
}
let name = parse_string(&data[0..100]);
let mode = parse_octal(&data[100..108]) as u32;
let uid = parse_octal(&data[108..116]) as u32;
let gid = parse_octal(&data[116..124]) as u32;
let size = parse_octal(&data[124..136]) as u64;
let mtime = parse_octal(&data[136..148]) as u64;
let typeflag = TarType::from_byte(data[156]);
let linkname = parse_string(&data[157..257]);
let (uname, gname, devmajor, devminor, prefix) =
if &data[257..263] == USTAR_MAGIC || &data[257..263] == GNU_MAGIC {
(
parse_string(&data[265..297]),
parse_string(&data[297..329]),
parse_octal(&data[329..337]) as u32,
parse_octal(&data[337..345]) as u32,
parse_string(&data[345..500]),
)
} else {
(String::new(), String::new(), 0, 0, String::new())
};
let full_name = if !prefix.is_empty() {
alloc::format!("{}/{}", prefix, name)
} else {
name
};
Some(Self {
name: full_name,
mode,
uid,
gid,
size,
mtime,
typeflag,
linkname,
uname,
gname,
devmajor,
devminor,
prefix: String::new(), })
}
pub fn to_bytes(&self) -> [u8; TAR_BLOCK_SIZE] {
let mut header = [0u8; TAR_BLOCK_SIZE];
let (name_part, prefix_part) = if self.name.len() > 100 {
let split_pos = self.name.len().saturating_sub(100);
(&self.name[split_pos..], &self.name[..split_pos])
} else {
(self.name.as_str(), "")
};
write_string(&mut header[0..100], name_part);
write_octal(&mut header[100..108], self.mode as usize);
write_octal(&mut header[108..116], self.uid as usize);
write_octal(&mut header[116..124], self.gid as usize);
write_octal(&mut header[124..136], self.size as usize);
write_octal(&mut header[136..148], self.mtime as usize);
header[148..156].copy_from_slice(b" ");
header[156] = self.typeflag.as_byte();
write_string(&mut header[157..257], &self.linkname);
header[257..263].copy_from_slice(USTAR_MAGIC);
header[263..265].copy_from_slice(b"00");
write_string(&mut header[265..297], &self.uname);
write_string(&mut header[297..329], &self.gname);
write_octal(&mut header[329..337], self.devmajor as usize);
write_octal(&mut header[337..345], self.devminor as usize);
write_string(&mut header[345..500], prefix_part);
let checksum = compute_checksum(&header);
write_octal(&mut header[148..155], checksum);
header[155] = 0;
header
}
}
fn parse_string(field: &[u8]) -> String {
let end = field.iter().position(|&b| b == 0).unwrap_or(field.len());
String::from_utf8_lossy(&field[..end]).trim().to_string()
}
fn parse_octal(field: &[u8]) -> usize {
let s = parse_string(field);
let trimmed = s.trim();
if trimmed.is_empty() {
return 0;
}
usize::from_str_radix(trimmed, 8).unwrap_or(0)
}
fn write_string(field: &mut [u8], value: &str) {
let bytes = value.as_bytes();
let len = bytes.len().min(field.len());
field[..len].copy_from_slice(&bytes[..len]);
}
fn write_octal(field: &mut [u8], value: usize) {
let s = alloc::format!("{:0>width$o}", value, width = field.len() - 1);
let bytes = s.as_bytes();
let start = field.len().saturating_sub(bytes.len() + 1);
let len = bytes.len().min(field.len() - start);
field[start..start + len].copy_from_slice(&bytes[..len]);
}
fn compute_checksum(header: &[u8]) -> usize {
let mut sum = 0usize;
for (i, &b) in header[..TAR_BLOCK_SIZE].iter().enumerate() {
if (148..156).contains(&i) {
sum += b' ' as usize; } else {
sum += b as usize;
}
}
sum
}
fn compute_checksum_with_spaces(header: &[u8]) -> usize {
header[..TAR_BLOCK_SIZE].iter().map(|&b| b as usize).sum()
}
fn compress_lz4(data: &[u8]) -> Vec<u8> {
lz4_flex::compress_prepend_size(data)
}
fn decompress_lz4(data: &[u8]) -> Result<Vec<u8>, ArchiveError> {
lz4_flex::decompress_size_prepended(data).map_err(|_| ArchiveError::DecompressError)
}
pub fn create_tar(files: &[(&str, &[u8])]) -> Result<Vec<u8>, ArchiveError> {
let mut archive = Vec::new();
for (name, data) in files {
let header = TarHeader {
name: (*name).to_string(),
mode: 0o644,
uid: 0,
gid: 0,
size: data.len() as u64,
mtime: 0,
typeflag: TarType::RegularFile,
linkname: String::new(),
uname: "root".to_string(),
gname: "root".to_string(),
devmajor: 0,
devminor: 0,
prefix: String::new(),
};
archive.extend_from_slice(&header.to_bytes());
archive.extend_from_slice(data);
let padding = (TAR_BLOCK_SIZE - (data.len() % TAR_BLOCK_SIZE)) % TAR_BLOCK_SIZE;
archive.extend(core::iter::repeat_n(0u8, padding));
}
archive.extend(core::iter::repeat_n(0u8, TAR_BLOCK_SIZE * 2));
Ok(archive)
}
pub fn create_tar_lz4(files: &[(&str, &[u8])]) -> Result<Vec<u8>, ArchiveError> {
let tar_data = create_tar(files)?;
Ok(compress_lz4(&tar_data))
}
pub fn parse_tar(data: &[u8]) -> Result<BTreeMap<String, ArchiveEntry>, ArchiveError> {
let mut entries = BTreeMap::new();
let mut offset = 0;
while offset + TAR_BLOCK_SIZE <= data.len() {
let header_data = &data[offset..offset + TAR_BLOCK_SIZE];
let header = match TarHeader::from_bytes(header_data) {
Some(h) => h,
None => break, };
let entry = ArchiveEntry {
name: header.name.clone(),
is_dir: header.typeflag == TarType::Directory,
size: header.size,
compressed_size: header.size, mtime: header.mtime,
mode: header.mode,
offset: (offset + TAR_BLOCK_SIZE) as u64, compression: CompressionMethod::Store,
crc32: 0,
};
entries.insert(header.name, entry);
offset += TAR_BLOCK_SIZE;
if header.size > 0 {
let data_blocks = (header.size as usize).div_ceil(TAR_BLOCK_SIZE);
offset += data_blocks * TAR_BLOCK_SIZE;
}
}
Ok(entries)
}
pub fn parse_tar_lz4(data: &[u8]) -> Result<BTreeMap<String, ArchiveEntry>, ArchiveError> {
let decompressed = decompress_lz4(data)?;
parse_tar(&decompressed)
}
pub fn parse_tar_directory(path: &str) -> Result<BTreeMap<String, ArchiveEntry>, ArchiveError> {
let _ = path;
Ok(BTreeMap::new())
}
pub fn extract_file(archive_data: &[u8], entry: &ArchiveEntry) -> Result<Vec<u8>, ArchiveError> {
let offset = entry.offset as usize;
let size = entry.size as usize;
if offset + size > archive_data.len() {
return Err(ArchiveError::InvalidFormat);
}
Ok(archive_data[offset..offset + size].to_vec())
}
pub fn extract_all(archive_data: &[u8]) -> Result<Vec<(String, Vec<u8>)>, ArchiveError> {
let entries = parse_tar(archive_data)?;
let mut files = Vec::new();
for (name, entry) in entries {
if !entry.is_dir {
let data = extract_file(archive_data, &entry)?;
files.push((name, data));
}
}
Ok(files)
}
pub fn extract_all_lz4(archive_data: &[u8]) -> Result<Vec<(String, Vec<u8>)>, ArchiveError> {
let decompressed = decompress_lz4(archive_data)?;
extract_all(&decompressed)
}
pub fn compression_stats(original_size: usize, compressed_size: usize) -> (f32, usize) {
let ratio = if compressed_size > 0 {
original_size as f32 / compressed_size as f32
} else {
1.0
};
let saved = original_size.saturating_sub(compressed_size);
(ratio, saved)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_string() {
let field = b"hello\0\0\0\0\0";
assert_eq!(parse_string(field), "hello");
}
#[test]
fn test_parse_octal() {
let field = b"000644\0 ";
assert_eq!(parse_octal(field), 0o644);
}
#[test]
fn test_create_and_parse_tar() {
let files = [
("hello.txt", b"Hello, World!".as_slice()),
("data.bin", &[0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
];
let archive = create_tar(&files).unwrap();
let entries = parse_tar(&archive).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.contains_key("hello.txt"));
assert!(entries.contains_key("data.bin"));
let hello_entry = entries.get("hello.txt").unwrap();
assert_eq!(hello_entry.size, 13);
}
#[test]
fn test_extract_file() {
let original_data = b"This is test content for extraction!";
let files = [("test.txt", original_data.as_slice())];
let archive = create_tar(&files).unwrap();
let entries = parse_tar(&archive).unwrap();
let entry = entries.get("test.txt").unwrap();
let extracted = extract_file(&archive, entry).unwrap();
assert_eq!(extracted, original_data);
}
#[test]
fn test_tar_lz4_roundtrip() {
let mut data = Vec::new();
for _ in 0..100 {
data.extend_from_slice(b"AAAAAAAAAA");
}
let files = [("compressible.txt", data.as_slice())];
let compressed = create_tar_lz4(&files).unwrap();
let uncompressed = create_tar(&files).unwrap();
assert!(compressed.len() < uncompressed.len());
let extracted = extract_all_lz4(&compressed).unwrap();
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].0, "compressible.txt");
assert_eq!(extracted[0].1, data);
}
#[test]
fn test_header_roundtrip() {
let header = TarHeader {
name: "test/file.txt".to_string(),
mode: 0o755,
uid: 1000,
gid: 1000,
size: 12345,
mtime: 1234567890,
typeflag: TarType::RegularFile,
linkname: String::new(),
uname: "user".to_string(),
gname: "group".to_string(),
devmajor: 0,
devminor: 0,
prefix: String::new(),
};
let bytes = header.to_bytes();
let parsed = TarHeader::from_bytes(&bytes).unwrap();
assert_eq!(parsed.name, header.name);
assert_eq!(parsed.mode, header.mode);
assert_eq!(parsed.size, header.size);
assert_eq!(parsed.mtime, header.mtime);
}
#[test]
fn test_compression_stats() {
let (ratio, saved) = compression_stats(1000, 200);
assert!((ratio - 5.0).abs() < 0.01);
assert_eq!(saved, 800);
}
}