use pf_core::cas::BlobStore;
use pf_core::digest::Digest256;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct FsTreeEntry {
pub path: String,
pub mode: String,
pub size: u64,
pub kind: FsEntryKind,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub blob: Option<Digest256>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub link_target: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FsEntryKind {
File,
Dir,
Symlink,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FsTree {
pub kind: String,
pub entries: Vec<FsTreeEntry>,
}
pub struct WalkFsCapture {
root: PathBuf,
use_apfs_clone: bool,
follow_symlinks: bool,
ignore: Vec<String>,
}
impl WalkFsCapture {
pub fn new(root: impl AsRef<Path>) -> Self {
Self {
root: root.as_ref().to_path_buf(),
use_apfs_clone: false,
follow_symlinks: false,
ignore: vec![
".git/objects".into(),
"target".into(),
"node_modules".into(),
".pfcid".into(),
],
}
}
#[must_use]
pub fn use_apfs_clone(mut self, enable: bool) -> Self {
self.use_apfs_clone = enable;
self
}
#[must_use]
pub fn follow_symlinks(mut self, enable: bool) -> Self {
self.follow_symlinks = enable;
self
}
#[must_use]
pub fn ignore(mut self, fragment: impl Into<String>) -> Self {
self.ignore.push(fragment.into());
self
}
pub fn capture(&self, blobs: &Arc<dyn BlobStore>) -> pf_core::Result<Digest256> {
let walk_root: PathBuf = if self.use_apfs_clone && cfg!(target_os = "macos") {
apfs_clone(&self.root).unwrap_or_else(|_| self.root.clone())
} else {
self.root.clone()
};
let mut raw: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(&walk_root)
.follow_links(self.follow_symlinks)
.into_iter()
.filter_entry(|e| {
!path_matches_any_ignore(e.path(), &self.ignore)
})
.filter_map(std::result::Result::ok)
.collect();
raw.retain(|e| e.path() != walk_root.as_path());
raw.sort_by(|a, b| a.path().cmp(b.path()));
let entries: Vec<FsTreeEntry> = raw
.par_iter()
.map(|de| -> pf_core::Result<FsTreeEntry> {
let abs = de.path();
let rel = abs.strip_prefix(&walk_root).unwrap_or(abs);
let rel_str = rel.to_string_lossy().replace('\\', "/");
let meta = de
.metadata()
.map_err(|e| std::io::Error::other(e.to_string()))?;
let mode = unix_mode_string(&meta);
if meta.file_type().is_dir() {
return Ok(FsTreeEntry {
path: rel_str,
mode,
size: 0,
kind: FsEntryKind::Dir,
blob: None,
link_target: None,
});
}
if meta.file_type().is_symlink() {
let target = std::fs::read_link(abs)?;
let target_str = target.to_string_lossy().to_string();
let blob = blobs.put(target_str.as_bytes())?;
return Ok(FsTreeEntry {
path: rel_str,
mode,
size: target_str.len() as u64,
kind: FsEntryKind::Symlink,
blob: Some(blob),
link_target: Some(target_str),
});
}
let bytes = std::fs::read(abs)?;
let size = bytes.len() as u64;
let digest = blobs.put(&bytes)?;
Ok(FsTreeEntry {
path: rel_str,
mode,
size,
kind: FsEntryKind::File,
blob: Some(digest),
link_target: None,
})
})
.collect::<pf_core::Result<Vec<_>>>()?;
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries,
};
let json = serde_json::to_vec(&tree)?;
blobs.put(&json)
}
}
pub fn restore_tree(
blobs: &Arc<dyn BlobStore>,
tree_digest: &Digest256,
dst: impl AsRef<Path>,
) -> pf_core::Result<()> {
let dst = dst.as_ref();
if dst.exists() {
return Err(pf_core::Error::Io(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!(
"restore_tree refuses to overwrite existing path {}",
dst.display()
),
)));
}
let tree_bytes = blobs.get(tree_digest)?;
let tree: FsTree = serde_json::from_slice(&tree_bytes)?;
if tree.kind != "fs.tree.v1" {
return Err(pf_core::Error::Integrity(format!(
"expected fs.tree.v1, got {}",
tree.kind
)));
}
let parent = dst.parent().unwrap_or_else(|| Path::new("."));
std::fs::create_dir_all(parent)?;
let staging = parent.join(format!(
".pf-restore.{}.{}",
std::process::id(),
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
));
std::fs::create_dir(&staging)?;
for e in tree
.entries
.iter()
.filter(|e| matches!(e.kind, FsEntryKind::Dir))
{
let safe = safe_join(&staging, &e.path)?;
std::fs::create_dir_all(&safe)?;
apply_mode(&safe, &e.mode)?;
}
for e in &tree.entries {
let p = safe_join(&staging, &e.path)?;
match e.kind {
FsEntryKind::Dir => {}
FsEntryKind::File => {
let blob = e.blob.as_ref().ok_or_else(|| {
pf_core::Error::Integrity(format!("file entry {} missing blob", e.path))
})?;
let bytes = blobs.get(blob)?;
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&p, bytes)?;
apply_mode(&p, &e.mode)?;
}
FsEntryKind::Symlink => {
let raw_target = e.link_target.as_ref().ok_or_else(|| {
pf_core::Error::Integrity(format!(
"symlink entry {} missing link_target",
e.path
))
})?;
check_symlink_target(&staging, &p, raw_target)?;
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent)?;
}
#[cfg(unix)]
std::os::unix::fs::symlink(raw_target, &p)?;
#[cfg(not(unix))]
std::fs::write(&p, raw_target.as_bytes())?;
}
}
}
std::fs::rename(&staging, dst)?;
Ok(())
}
fn path_matches_any_ignore(path: &Path, ignores: &[String]) -> bool {
let comps: Vec<&str> = path
.components()
.filter_map(|c| match c {
std::path::Component::Normal(s) => s.to_str(),
_ => None,
})
.collect();
for ign in ignores {
let needles: Vec<&str> = ign.split('/').filter(|s| !s.is_empty()).collect();
if needles.is_empty() {
continue;
}
for w in comps.windows(needles.len()) {
if w == needles.as_slice() {
return true;
}
}
}
false
}
fn safe_join(root: &Path, relative: &str) -> pf_core::Result<PathBuf> {
let candidate = Path::new(relative);
if candidate.is_absolute() {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry has absolute path {relative:?} — refusing"
)));
}
for comp in candidate.components() {
match comp {
std::path::Component::ParentDir => {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry path {relative:?} contains `..` — refusing"
)));
}
std::path::Component::RootDir | std::path::Component::Prefix(_) => {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry path {relative:?} has root/prefix — refusing"
)));
}
std::path::Component::CurDir | std::path::Component::Normal(_) => {}
}
}
Ok(root.join(candidate))
}
fn check_symlink_target(root: &Path, link_path: &Path, target: &str) -> pf_core::Result<()> {
let target_path = Path::new(target);
if target_path.is_absolute() {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} is absolute — refusing"
)));
}
let link_depth = link_path
.strip_prefix(root)
.ok()
.map_or(0, |p| p.components().count().saturating_sub(1));
let mut depth = isize::try_from(link_depth).unwrap_or(isize::MAX);
for comp in target_path.components() {
match comp {
std::path::Component::ParentDir => depth -= 1,
std::path::Component::Normal(_) => depth += 1,
std::path::Component::CurDir => {}
std::path::Component::RootDir | std::path::Component::Prefix(_) => {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} has root/prefix — refusing"
)));
}
}
if depth < 0 {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} escapes restore root — refusing"
)));
}
}
Ok(())
}
#[cfg(unix)]
fn apply_mode(path: &Path, mode: &str) -> pf_core::Result<()> {
use std::os::unix::fs::PermissionsExt as _;
let raw = u32::from_str_radix(mode, 8).unwrap_or(0o644);
let perm = std::fs::Permissions::from_mode(raw & 0o7777);
let meta = std::fs::symlink_metadata(path)?;
if meta.file_type().is_symlink() {
return Ok(());
}
std::fs::set_permissions(path, perm)?;
Ok(())
}
#[cfg(not(unix))]
fn apply_mode(_path: &Path, _mode: &str) -> pf_core::Result<()> {
Ok(())
}
#[cfg(target_os = "macos")]
fn apfs_clone(src: &Path) -> std::io::Result<PathBuf> {
use std::process::Command;
let dst = std::env::temp_dir().join(format!(
"pf-apfs-clone.{}.{}",
std::process::id(),
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
));
let status = Command::new("cp")
.args(["-c", "-R"])
.arg(src)
.arg(&dst)
.status()?;
if !status.success() {
return Err(std::io::Error::other(format!(
"cp -c -R exit status: {status:?}"
)));
}
Ok(dst)
}
#[cfg(not(target_os = "macos"))]
fn apfs_clone(_src: &Path) -> std::io::Result<PathBuf> {
Err(std::io::Error::other("APFS clone only available on macOS"))
}
#[cfg(unix)]
fn unix_mode_string(meta: &std::fs::Metadata) -> String {
use std::os::unix::fs::PermissionsExt;
format!("{:04o}", meta.permissions().mode() & 0o7777)
}
#[cfg(not(unix))]
fn unix_mode_string(meta: &std::fs::Metadata) -> String {
if meta.permissions().readonly() {
"0444".into()
} else {
"0644".into()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pf_core::cas::MemBlobStore;
use std::sync::Arc;
use tempfile::TempDir;
fn write(dir: &Path, rel: &str, contents: &[u8]) {
let p = dir.join(rel);
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(&p, contents).unwrap();
}
#[test]
fn round_trip_small_tree() {
let src = TempDir::new().unwrap();
write(src.path(), "a.txt", b"hello");
write(src.path(), "sub/b.txt", b"world");
write(src.path(), "sub/c.bin", &vec![0xABu8; 8 * 1024]);
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let tree_cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("restored");
restore_tree(&blobs, &tree_cid, &dst).unwrap();
assert_eq!(std::fs::read(dst.join("a.txt")).unwrap(), b"hello");
assert_eq!(std::fs::read(dst.join("sub/b.txt")).unwrap(), b"world");
assert_eq!(
std::fs::read(dst.join("sub/c.bin")).unwrap().len(),
8 * 1024
);
}
#[test]
fn capture_is_deterministic() {
let src = TempDir::new().unwrap();
write(src.path(), "a.txt", b"hello");
write(src.path(), "b.txt", b"world");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid1 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let cid2 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
assert_eq!(
cid1, cid2,
"capture of identical tree must be byte-identical"
);
}
#[test]
fn ignored_paths_are_skipped() {
let src = TempDir::new().unwrap();
write(src.path(), "kept.txt", b"keep");
write(src.path(), "node_modules/dep/index.js", b"skip");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let bytes = blobs.get(&cid).unwrap();
let tree: FsTree = serde_json::from_slice(&bytes).unwrap();
assert!(tree.entries.iter().any(|e| e.path == "kept.txt"));
assert!(
!tree
.entries
.iter()
.any(|e| e.path.starts_with("node_modules"))
);
}
#[cfg(unix)]
#[test]
fn symlinks_are_captured_as_symlinks() {
let src = TempDir::new().unwrap();
write(src.path(), "real.txt", b"data");
std::os::unix::fs::symlink("real.txt", src.path().join("link.txt")).unwrap();
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("r");
restore_tree(&blobs, &cid, &dst).unwrap();
let meta = std::fs::symlink_metadata(dst.join("link.txt")).unwrap();
assert!(meta.file_type().is_symlink());
assert_eq!(
std::fs::read_link(dst.join("link.txt"))
.unwrap()
.to_str()
.unwrap(),
"real.txt"
);
}
#[test]
fn malicious_relative_path_traversal_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let payload = b"PWNED";
let blob = blobs.put(payload).unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "../../escape.txt".into(),
mode: "100644".into(),
size: payload.len() as u64,
kind: FsEntryKind::File,
blob: Some(blob),
link_target: None,
}],
};
let tree_bytes = serde_json::to_vec(&tree).unwrap();
let tree_cid = blobs.put(&tree_bytes).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("`..`") || format!("{err}").contains("refusing"),
"expected path-traversal refusal, got {err}"
);
assert!(!restore_root.path().join("escape.txt").exists());
}
#[test]
fn malicious_absolute_path_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let blob = blobs.put(b"x").unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "/tmp/should-not-write".into(),
mode: "100644".into(),
size: 1,
kind: FsEntryKind::File,
blob: Some(blob),
link_target: None,
}],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("absolute") || format!("{err}").contains("refusing"),
"expected absolute-path refusal, got {err}"
);
}
#[cfg(unix)]
#[test]
fn malicious_symlink_escape_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let target_str = "../../escape";
let blob = blobs.put(target_str.as_bytes()).unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "evil.lnk".into(),
mode: "120777".into(),
size: target_str.len() as u64,
kind: FsEntryKind::Symlink,
blob: Some(blob),
link_target: Some(target_str.to_owned()),
}],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("escape") || format!("{err}").contains("refusing"),
"expected symlink-escape refusal, got {err}"
);
}
#[cfg(unix)]
#[test]
fn executable_mode_is_restored() {
use std::os::unix::fs::PermissionsExt as _;
let src = TempDir::new().unwrap();
write(src.path(), "script.sh", b"#!/bin/sh\necho hi\n");
let scr = src.path().join("script.sh");
std::fs::set_permissions(&scr, std::fs::Permissions::from_mode(0o755)).unwrap();
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("r");
restore_tree(&blobs, &cid, &dst).unwrap();
let meta = std::fs::metadata(dst.join("script.sh")).unwrap();
assert_eq!(
meta.permissions().mode() & 0o7777,
0o755,
"executable bit must survive snapshot+restore"
);
}
#[test]
fn ignore_matches_segments_not_substrings() {
let src = TempDir::new().unwrap();
write(src.path(), "src/targeted/keep.txt", b"keep");
write(src.path(), "target/should-skip.txt", b"skip");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(
paths.contains(&"src/targeted/keep.txt"),
"src/targeted/keep.txt must NOT be filtered (was: {paths:?})"
);
assert!(
!paths.iter().any(|p| p.starts_with("target/")),
"target/ subtree must be filtered (was: {paths:?})"
);
}
}