asar-rust 0.1.0

Rust port of @electron/asar — create and extract Electron ASAR archives
Documentation
use crate::filesystem::{FileEntry, Filesystem, FilesystemEntry};
use crate::integrity::FileIntegrity;
use crate::path_validation::ensure_within;
use crate::pickle::Pickle;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::io::{Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::sync::{Arc, LazyLock, RwLock};

static FILESYSTEM_CACHE: LazyLock<RwLock<HashMap<PathBuf, Arc<Filesystem>>>> =
    LazyLock::new(|| RwLock::new(HashMap::new()));

use thiserror::Error;

/// Errors that can occur during ASAR archive operations.
#[derive(Error, Debug)]
pub enum AsarError {
    #[error("I/O error: {0}")]
    Io(#[from] std::io::Error),
    #[error("Invalid archive header: {0}")]
    HeaderValidation(String),
    #[error("{0}")]
    NotFound(String),
    #[error("Path traversal: {0}")]
    PathTraversal(String),
    #[error("Circular symlink detected: {0}")]
    CircularSymlink(String),
    #[error("Too many levels of symbolic links")]
    SymlinkDepth,
    #[error("{path}: file size can not be larger than 4.2GB")]
    FileTooLarge { path: String },
    #[error("{0}")]
    Other(String),
}

/// Integrity metadata stored in the ASAR header for a file.
#[derive(Debug, Serialize, Deserialize)]
pub struct HeaderIntegrity {
    pub algorithm: String,
    pub hash: String,
    #[serde(rename = "blockSize")]
    pub block_size: usize,
    pub blocks: Vec<String>,
}

/// A node in the ASAR archive header tree.
///
/// Represents a file, directory, or symbolic link entry.
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum HeaderEntry {
    File {
        #[serde(default)]
        offset: Option<String>,
        size: u64,
        #[serde(skip_serializing_if = "Option::is_none")]
        executable: Option<bool>,
        #[serde(skip_serializing_if = "Option::is_none")]
        unpacked: Option<bool>,
        #[serde(skip_serializing_if = "Option::is_none")]
        integrity: Option<HeaderIntegrity>,
    },
    Directory {
        files: IndexMap<String, HeaderEntry>,
        #[serde(skip_serializing_if = "Option::is_none")]
        unpacked: Option<bool>,
    },
    Link {
        link: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        unpacked: Option<bool>,
    },
}

/// The parsed header of an ASAR archive.
#[derive(Debug)]
pub struct ArchiveHeader {
    pub header: HeaderEntry,
    pub header_string: String,
    pub header_size: u32,
}

pub fn read_archive_header_sync(archive_path: &Path) -> Result<ArchiveHeader, AsarError> {
    let mut file = fs::File::open(archive_path)?;
    let archive_size = file.metadata()?.len();

    let mut size_buf = [0u8; 8];
    file.read_exact(&mut size_buf)?;

    let size_pickle = Pickle::from_buffer(&size_buf);
    let mut size_iter = size_pickle.iter();
    let size = size_iter.read_u32();

    if size as u64 > archive_size.saturating_sub(8) {
        return Err(AsarError::HeaderValidation(format!(
            "Header size {} exceeds archive size {}. The archive is corrupted.",
            size, archive_size
        )));
    }

    let mut header_buf = vec![0u8; size as usize];
    file.read_exact(&mut header_buf)?;

    let header_pickle = Pickle::from_buffer(&header_buf);
    let mut header_iter = header_pickle.iter();
    let header_string = header_iter.read_string();

    let parsed_header: HeaderEntry = serde_json::from_str(&header_string)
        .map_err(|e| AsarError::HeaderValidation(e.to_string()))?;

    validate_header(&parsed_header)?;

    Ok(ArchiveHeader {
        header: parsed_header,
        header_string,
        header_size: size,
    })
}

fn validate_header(header: &HeaderEntry) -> Result<(), AsarError> {
    match header {
        HeaderEntry::Directory { .. } => Ok(()),
        _ => Err(AsarError::HeaderValidation(
            "root header must be a directory".to_string(),
        )),
    }
}

impl TryFrom<&HeaderEntry> for FilesystemEntry {
    type Error = AsarError;
    fn try_from(entry: &HeaderEntry) -> Result<Self, AsarError> {
        match entry {
            HeaderEntry::File {
                offset,
                size,
                executable,
                unpacked,
                integrity,
            } => {
                let integ = integrity.as_ref().map(|i| FileIntegrity {
                    algorithm: i.algorithm.clone(),
                    hash: i.hash.clone(),
                    block_size: i.block_size,
                    blocks: i.blocks.clone(),
                });
                Ok(FilesystemEntry::File(FileEntry {
                    offset: offset.clone().unwrap_or_else(|| "0".to_string()),
                    size: *size,
                    executable: executable.unwrap_or(false),
                    unpacked: unpacked.unwrap_or(false),
                    integrity: integ,
                }))
            }
            HeaderEntry::Directory { files, unpacked } => {
                let mut map = IndexMap::new();
                for (name, child) in files {
                    map.insert(name.clone(), FilesystemEntry::try_from(child)?);
                }
                Ok(FilesystemEntry::Directory(
                    crate::filesystem::DirectoryEntry {
                        files: map,
                        unpacked: unpacked.unwrap_or(false),
                    },
                ))
            }
            HeaderEntry::Link { link, unpacked } => {
                Ok(FilesystemEntry::Link(crate::filesystem::LinkEntry {
                    link: link.clone(),
                    unpacked: unpacked.unwrap_or(false),
                }))
            }
        }
    }
}

pub fn read_filesystem_sync(archive_path: &Path) -> Result<Arc<Filesystem>, AsarError> {
    {
        let cache = FILESYSTEM_CACHE.read().unwrap();
        if let Some(fs) = cache.get(archive_path) {
            return Ok(Arc::clone(fs));
        }
    }

    let archive_header = read_archive_header_sync(archive_path)?;
    let root_entry = FilesystemEntry::try_from(&archive_header.header)?;

    let path_buf = archive_path.to_path_buf();
    let mut fs = Filesystem::new(&path_buf);
    fs.set_header(root_entry, archive_header.header_size);

    let arc = Arc::new(fs);
    let mut cache = FILESYSTEM_CACHE.write().unwrap();
    cache.insert(path_buf, Arc::clone(&arc));
    Ok(arc)
}

impl From<&FilesystemEntry> for HeaderEntry {
    fn from(entry: &FilesystemEntry) -> Self {
        match entry {
            FilesystemEntry::File(f) => HeaderEntry::File {
                offset: if f.unpacked { None } else { Some(f.offset.clone()) },
                size: f.size,
                executable: if f.executable { Some(true) } else { None },
                unpacked: if f.unpacked { Some(true) } else { None },
                integrity: f.integrity.as_ref().map(|i| HeaderIntegrity {
                    algorithm: i.algorithm.clone(),
                    hash: i.hash.clone(),
                    block_size: i.block_size,
                    blocks: i.blocks.clone(),
                }),
            },
            FilesystemEntry::Directory(d) => {
                let mut files = IndexMap::new();
                for (name, child) in &d.files {
                    files.insert(name.clone(), HeaderEntry::from(child));
                }
                HeaderEntry::Directory {
                    files,
                    unpacked: if d.unpacked { Some(true) } else { None },
                }
            }
            FilesystemEntry::Link(l) => HeaderEntry::Link {
                link: l.link.clone(),
                unpacked: if l.unpacked { Some(true) } else { None },
            },
        }
    }
}

pub fn write_filesystem(
    dest: &Path,
    filesystem: &Filesystem,
    files: &[(PathBuf, bool)],
    _metadata: &HashMap<PathBuf, crate::crawlfs::FileMetadata>,
) -> Result<(), AsarError> {
    if let Some(parent) = dest.parent() {
        fs::create_dir_all(parent)?;
    }

    let header_entry = HeaderEntry::from(filesystem.get_header());
    let header_json =
        serde_json::to_string(&header_entry).map_err(|e| AsarError::Other(e.to_string()))?;

    let mut header_pickle = Pickle::new();
    header_pickle.write_string(&header_json);
    let header_buf = header_pickle.into_buffer();

    let mut size_pickle = Pickle::new();
    size_pickle.write_u32(header_buf.len() as u32);
    let size_buf = size_pickle.into_buffer();

    let mut out = fs::File::create(dest)?;
    out.write_all(&size_buf)?;
    out.write_all(&header_buf)?;

    let unpacked_base = format!("{}.unpacked", dest.display());
    let unpacked_path = Path::new(&unpacked_base);

    for (filepath, unpack) in files {
        if *unpack {
            let relative = filepath
                .strip_prefix(filesystem.root_path())
                .unwrap_or(filepath);
            let target = unpacked_path.join(relative);
            if let Some(parent) = target.parent() {
                fs::create_dir_all(parent)?;
            }
            fs::copy(filepath, &target)?;
        } else {
            let mut source = fs::File::open(filepath)?;
            std::io::copy(&mut source, &mut out)?;
        }
    }

    Ok(())
}

pub fn read_file_sync(
    filesystem: &Filesystem,
    filename: &str,
    info: &FileEntry,
) -> Result<Vec<u8>, AsarError> {
    let mut file = fs::File::open(filesystem.root_path())?;
    read_file_with_fd(&mut file, filesystem, filename, info)
}

pub fn read_file_with_fd(
    file: &mut std::fs::File,
    filesystem: &Filesystem,
    filename: &str,
    info: &FileEntry,
) -> Result<Vec<u8>, AsarError> {
    if info.size == 0 {
        return Ok(Vec::new());
    }

    if info.unpacked {
        let unpacked_dir = format!("{}.unpacked", filesystem.root_path().display());
        return Ok(fs::read(
            ensure_within(Path::new(&unpacked_dir), filename)?,
        )?);
    }

    let file_offset: u64 = info
        .offset
        .parse()
        .map_err(|_| AsarError::Other(format!("Invalid offset: {}", info.offset)))?;

    let offset = 8 + filesystem.header_size() as u64 + file_offset;

    let archive_size = file.metadata()?.len();

    if offset
        .checked_add(info.size)
        .is_none_or(|end| end > archive_size)
    {
        return Err(AsarError::Other(format!(
            "File entry extends beyond archive boundary (offset={}, size={}, archiveSize={})",
            offset, info.size, archive_size
        )));
    }

    file.seek(std::io::SeekFrom::Start(offset))?;
    let size = usize::try_from(info.size).map_err(|_| AsarError::Other("size overflow".into()))?;
    let mut buffer = vec![0u8; size];
    file.read_exact(&mut buffer)?;
    Ok(buffer)
}

pub fn uncache_filesystem(archive_path: &Path) -> bool {
    let mut cache = FILESYSTEM_CACHE.write().unwrap();
    cache.remove(archive_path).is_some()
}

pub fn uncache_all() {
    let mut cache = FILESYSTEM_CACHE.write().unwrap();
    cache.clear();
}