use crate::filesystem::{FileEntry, Filesystem, FilesystemEntry};
use crate::integrity::FileIntegrity;
use crate::path_validation::ensure_within;
use crate::pickle::Pickle;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::io::{Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::sync::{Arc, LazyLock, RwLock};
static FILESYSTEM_CACHE: LazyLock<RwLock<HashMap<PathBuf, Arc<Filesystem>>>> =
LazyLock::new(|| RwLock::new(HashMap::new()));
use thiserror::Error;
#[derive(Error, Debug)]
pub enum AsarError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid archive header: {0}")]
HeaderValidation(String),
#[error("{0}")]
NotFound(String),
#[error("Path traversal: {0}")]
PathTraversal(String),
#[error("Circular symlink detected: {0}")]
CircularSymlink(String),
#[error("Too many levels of symbolic links")]
SymlinkDepth,
#[error("{path}: file size can not be larger than 4.2GB")]
FileTooLarge { path: String },
#[error("{0}")]
Other(String),
}
#[derive(Debug, Serialize, Deserialize)]
pub struct HeaderIntegrity {
pub algorithm: String,
pub hash: String,
#[serde(rename = "blockSize")]
pub block_size: usize,
pub blocks: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum HeaderEntry {
File {
#[serde(default)]
offset: Option<String>,
size: u64,
#[serde(skip_serializing_if = "Option::is_none")]
executable: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
unpacked: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
integrity: Option<HeaderIntegrity>,
},
Directory {
files: IndexMap<String, HeaderEntry>,
#[serde(skip_serializing_if = "Option::is_none")]
unpacked: Option<bool>,
},
Link {
link: String,
#[serde(skip_serializing_if = "Option::is_none")]
unpacked: Option<bool>,
},
}
#[derive(Debug)]
pub struct ArchiveHeader {
pub header: HeaderEntry,
pub header_string: String,
pub header_size: u32,
}
pub fn read_archive_header_sync(archive_path: &Path) -> Result<ArchiveHeader, AsarError> {
let mut file = fs::File::open(archive_path)?;
let archive_size = file.metadata()?.len();
let mut size_buf = [0u8; 8];
file.read_exact(&mut size_buf)?;
let size_pickle = Pickle::from_buffer(&size_buf);
let mut size_iter = size_pickle.iter();
let size = size_iter.read_u32();
if size as u64 > archive_size.saturating_sub(8) {
return Err(AsarError::HeaderValidation(format!(
"Header size {} exceeds archive size {}. The archive is corrupted.",
size, archive_size
)));
}
let mut header_buf = vec![0u8; size as usize];
file.read_exact(&mut header_buf)?;
let header_pickle = Pickle::from_buffer(&header_buf);
let mut header_iter = header_pickle.iter();
let header_string = header_iter.read_string();
let parsed_header: HeaderEntry = serde_json::from_str(&header_string)
.map_err(|e| AsarError::HeaderValidation(e.to_string()))?;
validate_header(&parsed_header)?;
Ok(ArchiveHeader {
header: parsed_header,
header_string,
header_size: size,
})
}
fn validate_header(header: &HeaderEntry) -> Result<(), AsarError> {
match header {
HeaderEntry::Directory { .. } => Ok(()),
_ => Err(AsarError::HeaderValidation(
"root header must be a directory".to_string(),
)),
}
}
impl TryFrom<&HeaderEntry> for FilesystemEntry {
type Error = AsarError;
fn try_from(entry: &HeaderEntry) -> Result<Self, AsarError> {
match entry {
HeaderEntry::File {
offset,
size,
executable,
unpacked,
integrity,
} => {
let integ = integrity.as_ref().map(|i| FileIntegrity {
algorithm: i.algorithm.clone(),
hash: i.hash.clone(),
block_size: i.block_size,
blocks: i.blocks.clone(),
});
Ok(FilesystemEntry::File(FileEntry {
offset: offset.clone().unwrap_or_else(|| "0".to_string()),
size: *size,
executable: executable.unwrap_or(false),
unpacked: unpacked.unwrap_or(false),
integrity: integ,
}))
}
HeaderEntry::Directory { files, unpacked } => {
let mut map = IndexMap::new();
for (name, child) in files {
map.insert(name.clone(), FilesystemEntry::try_from(child)?);
}
Ok(FilesystemEntry::Directory(
crate::filesystem::DirectoryEntry {
files: map,
unpacked: unpacked.unwrap_or(false),
},
))
}
HeaderEntry::Link { link, unpacked } => {
Ok(FilesystemEntry::Link(crate::filesystem::LinkEntry {
link: link.clone(),
unpacked: unpacked.unwrap_or(false),
}))
}
}
}
}
pub fn read_filesystem_sync(archive_path: &Path) -> Result<Arc<Filesystem>, AsarError> {
{
let cache = FILESYSTEM_CACHE.read().unwrap();
if let Some(fs) = cache.get(archive_path) {
return Ok(Arc::clone(fs));
}
}
let archive_header = read_archive_header_sync(archive_path)?;
let root_entry = FilesystemEntry::try_from(&archive_header.header)?;
let path_buf = archive_path.to_path_buf();
let mut fs = Filesystem::new(&path_buf);
fs.set_header(root_entry, archive_header.header_size);
let arc = Arc::new(fs);
let mut cache = FILESYSTEM_CACHE.write().unwrap();
cache.insert(path_buf, Arc::clone(&arc));
Ok(arc)
}
impl From<&FilesystemEntry> for HeaderEntry {
fn from(entry: &FilesystemEntry) -> Self {
match entry {
FilesystemEntry::File(f) => HeaderEntry::File {
offset: if f.unpacked { None } else { Some(f.offset.clone()) },
size: f.size,
executable: if f.executable { Some(true) } else { None },
unpacked: if f.unpacked { Some(true) } else { None },
integrity: f.integrity.as_ref().map(|i| HeaderIntegrity {
algorithm: i.algorithm.clone(),
hash: i.hash.clone(),
block_size: i.block_size,
blocks: i.blocks.clone(),
}),
},
FilesystemEntry::Directory(d) => {
let mut files = IndexMap::new();
for (name, child) in &d.files {
files.insert(name.clone(), HeaderEntry::from(child));
}
HeaderEntry::Directory {
files,
unpacked: if d.unpacked { Some(true) } else { None },
}
}
FilesystemEntry::Link(l) => HeaderEntry::Link {
link: l.link.clone(),
unpacked: if l.unpacked { Some(true) } else { None },
},
}
}
}
pub fn write_filesystem(
dest: &Path,
filesystem: &Filesystem,
files: &[(PathBuf, bool)],
_metadata: &HashMap<PathBuf, crate::crawlfs::FileMetadata>,
) -> Result<(), AsarError> {
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent)?;
}
let header_entry = HeaderEntry::from(filesystem.get_header());
let header_json =
serde_json::to_string(&header_entry).map_err(|e| AsarError::Other(e.to_string()))?;
let mut header_pickle = Pickle::new();
header_pickle.write_string(&header_json);
let header_buf = header_pickle.into_buffer();
let mut size_pickle = Pickle::new();
size_pickle.write_u32(header_buf.len() as u32);
let size_buf = size_pickle.into_buffer();
let mut out = fs::File::create(dest)?;
out.write_all(&size_buf)?;
out.write_all(&header_buf)?;
let unpacked_base = format!("{}.unpacked", dest.display());
let unpacked_path = Path::new(&unpacked_base);
for (filepath, unpack) in files {
if *unpack {
let relative = filepath
.strip_prefix(filesystem.root_path())
.unwrap_or(filepath);
let target = unpacked_path.join(relative);
if let Some(parent) = target.parent() {
fs::create_dir_all(parent)?;
}
fs::copy(filepath, &target)?;
} else {
let mut source = fs::File::open(filepath)?;
std::io::copy(&mut source, &mut out)?;
}
}
Ok(())
}
pub fn read_file_sync(
filesystem: &Filesystem,
filename: &str,
info: &FileEntry,
) -> Result<Vec<u8>, AsarError> {
let mut file = fs::File::open(filesystem.root_path())?;
read_file_with_fd(&mut file, filesystem, filename, info)
}
pub fn read_file_with_fd(
file: &mut std::fs::File,
filesystem: &Filesystem,
filename: &str,
info: &FileEntry,
) -> Result<Vec<u8>, AsarError> {
if info.size == 0 {
return Ok(Vec::new());
}
if info.unpacked {
let unpacked_dir = format!("{}.unpacked", filesystem.root_path().display());
return Ok(fs::read(
ensure_within(Path::new(&unpacked_dir), filename)?,
)?);
}
let file_offset: u64 = info
.offset
.parse()
.map_err(|_| AsarError::Other(format!("Invalid offset: {}", info.offset)))?;
let offset = 8 + filesystem.header_size() as u64 + file_offset;
let archive_size = file.metadata()?.len();
if offset
.checked_add(info.size)
.is_none_or(|end| end > archive_size)
{
return Err(AsarError::Other(format!(
"File entry extends beyond archive boundary (offset={}, size={}, archiveSize={})",
offset, info.size, archive_size
)));
}
file.seek(std::io::SeekFrom::Start(offset))?;
let size = usize::try_from(info.size).map_err(|_| AsarError::Other("size overflow".into()))?;
let mut buffer = vec![0u8; size];
file.read_exact(&mut buffer)?;
Ok(buffer)
}
pub fn uncache_filesystem(archive_path: &Path) -> bool {
let mut cache = FILESYSTEM_CACHE.write().unwrap();
cache.remove(archive_path).is_some()
}
pub fn uncache_all() {
let mut cache = FILESYSTEM_CACHE.write().unwrap();
cache.clear();
}