use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result, anyhow, bail};
use filetime::FileTime;
use glob::Pattern;
use tracing::warn;
use crate::filter::PathFilter;
use crate::format::toc::{EntryType, TocMember};
use crate::reader::TarzanReader;
#[derive(Debug, Clone)]
pub struct ExtractOptions {
pub strip_components: usize,
pub excludes: Vec<String>,
pub includes: Vec<String>,
pub restore_mtime: bool,
pub skip_bad_chunks: bool,
}
impl Default for ExtractOptions {
fn default() -> Self {
Self {
strip_components: 0,
excludes: Vec::new(),
includes: Vec::new(),
restore_mtime: true,
skip_bad_chunks: false,
}
}
}
#[derive(Default)]
struct Deferred {
dir_times: Vec<(PathBuf, FileTime, FileTime)>,
hard_links: Vec<(String, PathBuf, PathBuf)>,
}
impl TarzanReader {
pub fn extract_to_dir<F>(
&mut self,
dest: &Path,
opts: &ExtractOptions,
mut on_extracted: F,
) -> Result<()>
where
F: FnMut(&str),
{
let includes = PathFilter::new(&opts.includes).context("invalid include/filter pattern")?;
let excludes = compile_patterns(&opts.excludes).context("invalid exclude pattern")?;
fs::create_dir_all(dest)
.with_context(|| format!("creating destination {}", dest.display()))?;
let mut deferred = Deferred::default();
let members = self.members().to_vec();
for member in &members {
if !includes.matches(&member.path) {
continue;
}
if member_excluded(&member.path, &excludes) {
continue;
}
let rel = match member_relative_path(member, opts.strip_components)? {
Some(p) if !p.as_os_str().is_empty() => p,
_ => continue,
};
let target = dest.join(&rel);
self.extract_one(member, &target, dest, opts, &mut deferred)?;
on_extracted(&member.path);
}
for (member_path, source, target) in deferred.hard_links {
if let Some(parent) = target.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("creating {}", parent.display()))?;
}
if !source.exists() {
warn!(
path = %member_path,
source = %source.display(),
"hard-link target was not extracted; skipping"
);
continue;
}
let _ = fs::remove_file(&target);
fs::hard_link(&source, &target).with_context(|| {
format!(
"creating hard link {} -> {}",
target.display(),
source.display()
)
})?;
}
for (path, atime, mtime) in deferred.dir_times {
filetime::set_file_times(&path, atime, mtime)
.with_context(|| format!("setting file times on directory {}", path.display()))?;
}
Ok(())
}
fn extract_one(
&mut self,
member: &TocMember,
target: &Path,
dest: &Path,
opts: &ExtractOptions,
deferred: &mut Deferred,
) -> Result<()> {
if let Some(parent) = target.parent() {
fs::create_dir_all(parent).with_context(|| format!("creating {}", parent.display()))?;
}
let mtime = member_mtime(member);
let atime = member_atime(member, mtime);
match member.entry_type {
EntryType::Dir => {
fs::create_dir_all(target)
.with_context(|| format!("creating dir {}", target.display()))?;
set_unix_mode(target, member.mode)?;
apply_member_xattrs(target, member)?;
if opts.restore_mtime {
deferred
.dir_times
.push((target.to_path_buf(), atime, mtime));
}
}
EntryType::File => {
let file = File::create(target)
.with_context(|| format!("creating file {}", target.display()))?;
let mut writer = BufWriter::new(file);
match self.extract_member(&member.path, &mut writer) {
Ok(()) => {
writer.flush()?;
set_unix_mode(target, member.mode)?;
apply_member_xattrs(target, member)?;
if opts.restore_mtime {
filetime::set_file_times(target, atime, mtime).with_context(|| {
format!("setting file times on {}", target.display())
})?;
}
}
Err(err) if opts.skip_bad_chunks => {
drop(writer);
let _ = fs::remove_file(target);
warn!(
path = %member.path,
error = format!("{err:#}"),
"skipping member with unreadable data (--skip-bad-chunks)"
);
}
Err(err) => return Err(err),
}
}
EntryType::Symlink => {
create_member_symlink(member, target)?;
if opts.restore_mtime {
filetime::set_symlink_file_times(target, atime, mtime).with_context(|| {
format!("setting mtime on symlink {}", target.display())
})?;
}
}
EntryType::HardLink => {
match member_link_target_relative_path(member, opts.strip_components)? {
Some(src_rel) if !src_rel.as_os_str().is_empty() => {
deferred.hard_links.push((
member.path.clone(),
dest.join(src_rel),
target.to_path_buf(),
));
}
_ => warn!(
path = %member.path,
"hard-link target stripped away; skipping"
),
}
}
EntryType::CharDevice | EntryType::BlockDevice | EntryType::Fifo | EntryType::Other => {
if matches!(member.entry_type, EntryType::Other)
&& let Some(raw) = member.raw_type_byte
{
warn!(
path = %member.path,
raw_type = format!("{} (0x{raw:02x})", raw as char),
"skipping unsupported entry type"
);
} else {
warn!(path = %member.path, "skipping unsupported entry type");
}
}
}
Ok(())
}
}
fn compile_patterns(raw: &[String]) -> Result<Vec<Pattern>> {
raw.iter()
.map(|s| {
Pattern::new(normalize_for_match(s)).map_err(|e| anyhow!("invalid pattern `{s}`: {e}"))
})
.collect()
}
fn normalize_for_match(s: &str) -> &str {
s.trim_start_matches("./").trim_end_matches('/')
}
fn member_excluded(path: &str, compiled: &[Pattern]) -> bool {
let p = normalize_for_match(path);
compiled.iter().any(|g| g.matches(p))
}
fn member_relative_path(member: &TocMember, strip: usize) -> Result<Option<PathBuf>> {
#[cfg(unix)]
if let Some(raw) = &member.path_bytes {
return normalize_member_path_bytes(raw, strip);
}
normalize_member_path(&member.path, strip)
}
fn member_link_target_relative_path(member: &TocMember, strip: usize) -> Result<Option<PathBuf>> {
#[cfg(unix)]
if let Some(raw) = &member.link_target_bytes {
return normalize_member_path_bytes(raw, strip);
}
let link_target = member
.link_target
.as_deref()
.ok_or_else(|| anyhow!("hard link {} has no link_target", member.path))?;
normalize_member_path(link_target, strip)
}
fn member_mtime(member: &TocMember) -> FileTime {
FileTime::from_unix_time(member.mtime, member.mtime_ns.unwrap_or(0))
}
fn member_atime(member: &TocMember, fallback: FileTime) -> FileTime {
match member.atime {
Some(sec) => FileTime::from_unix_time(sec, member.atime_ns.unwrap_or(0)),
None => fallback,
}
}
#[cfg(unix)]
fn apply_member_xattrs(target: &Path, member: &TocMember) -> Result<()> {
if let Some(xattrs) = &member.xattrs {
for (name, value) in xattrs {
xattr::set(target, name, value)
.with_context(|| format!("setting xattr {name} on {}", target.display()))?;
}
}
Ok(())
}
#[cfg(not(unix))]
fn apply_member_xattrs(_target: &Path, _member: &TocMember) -> Result<()> {
Ok(())
}
fn normalize_member_path(p: &str, strip: usize) -> Result<Option<PathBuf>> {
if p.starts_with('/') {
bail!("absolute path in archive (refusing to extract): {p}");
}
let mut parts: Vec<&str> = Vec::new();
for part in p.split('/') {
match part {
"" | "." => continue,
".." => bail!("path contains `..` (refusing to extract): {p}"),
s => parts.push(s),
}
}
if parts.len() <= strip {
return Ok(None);
}
Ok(Some(parts[strip..].iter().copied().collect()))
}
#[cfg(unix)]
fn normalize_member_path_bytes(raw: &[u8], strip: usize) -> Result<Option<PathBuf>> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
if raw.starts_with(b"/") {
bail!("absolute path in archive (refusing to extract)");
}
let mut parts: Vec<&[u8]> = Vec::new();
for part in raw.split(|b| *b == b'/') {
match part {
b"" | b"." => continue,
b".." => bail!("path contains `..` (refusing to extract)"),
s => parts.push(s),
}
}
if parts.len() <= strip {
return Ok(None);
}
let mut path = PathBuf::new();
for part in &parts[strip..] {
path.push(OsStr::from_bytes(part));
}
Ok(Some(path))
}
#[cfg(unix)]
fn set_unix_mode(target: &Path, mode: u32) -> Result<()> {
use std::os::unix::fs::PermissionsExt;
let perms = fs::Permissions::from_mode(mode & 0o7777);
fs::set_permissions(target, perms)
.with_context(|| format!("setting mode on {}", target.display()))?;
Ok(())
}
#[cfg(not(unix))]
fn set_unix_mode(_target: &Path, _mode: u32) -> Result<()> {
Ok(())
}
#[cfg(unix)]
fn create_symlink(link_target: &str, target: &Path) -> Result<()> {
std::os::unix::fs::symlink(link_target, target)
.with_context(|| format!("creating symlink {}", target.display()))?;
Ok(())
}
#[cfg(unix)]
fn create_member_symlink(member: &TocMember, target: &Path) -> Result<()> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
if let Some(raw) = &member.link_target_bytes {
std::os::unix::fs::symlink(OsStr::from_bytes(raw), target)
.with_context(|| format!("creating symlink {}", target.display()))?;
return Ok(());
}
let link_target = member
.link_target
.as_deref()
.ok_or_else(|| anyhow!("symlink {} has no link_target", member.path))?;
create_symlink(link_target, target)
}
#[cfg(not(unix))]
fn create_symlink(_link_target: &str, target: &Path) -> Result<()> {
bail!(
"symlinks not supported on this platform ({})",
target.display()
)
}
#[cfg(not(unix))]
fn create_member_symlink(member: &TocMember, target: &Path) -> Result<()> {
let link_target = member
.link_target
.as_deref()
.ok_or_else(|| anyhow!("symlink {} has no link_target", member.path))?;
create_symlink(link_target, target)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_rejects_absolute_path() {
let err = normalize_member_path("/etc/passwd", 0).unwrap_err();
assert!(err.to_string().contains("absolute"), "{err}");
}
#[test]
fn normalize_rejects_dotdot_components() {
let err = normalize_member_path("../escaped.txt", 0).unwrap_err();
assert!(err.to_string().contains(".."), "{err}");
let err = normalize_member_path("foo/../../bar", 0).unwrap_err();
assert!(err.to_string().contains(".."), "{err}");
}
#[test]
fn normalize_strips_dot_and_empty_components() {
let p = normalize_member_path("./foo/./bar", 0).unwrap().unwrap();
assert_eq!(p, PathBuf::from("foo/bar"));
}
#[test]
fn normalize_applies_strip_components() {
let p = normalize_member_path("./a/b/c.txt", 1).unwrap().unwrap();
assert_eq!(p, PathBuf::from("b/c.txt"));
let p = normalize_member_path("./a/b/c.txt", 2).unwrap().unwrap();
assert_eq!(p, PathBuf::from("c.txt"));
}
#[test]
fn normalize_skips_when_strip_consumes_all() {
assert!(normalize_member_path("./a", 1).unwrap().is_none());
assert!(normalize_member_path("./a/b", 2).unwrap().is_none());
assert!(normalize_member_path("./a/b", 5).unwrap().is_none());
}
#[test]
fn excludes_match_glob() {
let raw = vec!["*.csv".to_owned()];
let compiled = compile_patterns(&raw).unwrap();
assert!(member_excluded("data/numbers.csv", &compiled));
assert!(!member_excluded("data/blob.bin", &compiled));
}
}