use pf_core::cas::BlobStore;
use pf_core::digest::Digest256;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct FsTreeEntry {
pub path: String,
pub mode: String,
pub size: u64,
pub kind: FsEntryKind,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub blob: Option<Digest256>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub link_target: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FsEntryKind {
File,
Dir,
Symlink,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FsTree {
pub kind: String,
pub entries: Vec<FsTreeEntry>,
}
pub struct WalkFsCapture {
root: PathBuf,
use_apfs_clone: bool,
follow_symlinks: bool,
ignore: Vec<String>,
ignore_globs: Vec<globset::GlobMatcher>,
}
const DEFAULT_EXTRA_IGNORES: &[&str] = &[
"__pycache__",
".pytest_cache",
".mypy_cache",
".ruff_cache",
".tox",
".coverage",
".venv",
".DS_Store",
"*.pyc",
"*.pyo",
];
impl WalkFsCapture {
pub fn new(root: impl AsRef<Path>) -> Self {
let mut ignore: Vec<String> = vec![
".git/objects".into(),
"target".into(),
"node_modules".into(),
".pfcid".into(),
];
for extra in DEFAULT_EXTRA_IGNORES {
ignore.push((*extra).to_owned());
}
let ignore_globs = compile_globs(&ignore);
Self {
root: root.as_ref().to_path_buf(),
use_apfs_clone: false,
follow_symlinks: false,
ignore,
ignore_globs,
}
}
pub fn new_without_default_ignores(root: impl AsRef<Path>) -> Self {
let ignore: Vec<String> = vec![
".git/objects".into(),
"target".into(),
"node_modules".into(),
".pfcid".into(),
];
let ignore_globs = compile_globs(&ignore);
Self {
root: root.as_ref().to_path_buf(),
use_apfs_clone: false,
follow_symlinks: false,
ignore,
ignore_globs,
}
}
#[must_use]
pub fn use_apfs_clone(mut self, enable: bool) -> Self {
self.use_apfs_clone = enable;
self
}
#[must_use]
pub fn follow_symlinks(mut self, enable: bool) -> Self {
self.follow_symlinks = enable;
self
}
#[must_use]
pub fn ignore(mut self, fragment: impl Into<String>) -> Self {
let entry: String = fragment.into();
if has_glob_chars(&entry)
&& let Ok(g) = globset::Glob::new(&entry)
{
self.ignore_globs.push(g.compile_matcher());
}
self.ignore.push(entry);
self
}
pub fn ignore_from(mut self, path: impl AsRef<Path>) -> std::io::Result<Self> {
let path = path.as_ref();
if !path.exists() {
return Ok(self);
}
let content = std::fs::read_to_string(path)?;
for raw in content.lines() {
let line = raw.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
if line.starts_with('!') {
tracing::warn!(
"ignoring gitignore negation in {}: {} (negation not yet supported in v1.0.13)",
path.display(),
line
);
continue;
}
let trimmed = line.trim_start_matches('/').trim_end_matches('/');
if trimmed.is_empty() {
continue;
}
self = self.ignore(trimmed);
}
Ok(self)
}
pub fn capture(&self, blobs: &Arc<dyn BlobStore>) -> pf_core::Result<Digest256> {
let walk_root: PathBuf = if self.use_apfs_clone && cfg!(target_os = "macos") {
apfs_clone(&self.root).unwrap_or_else(|_| self.root.clone())
} else {
self.root.clone()
};
let mut raw: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(&walk_root)
.follow_links(self.follow_symlinks)
.into_iter()
.filter_entry(|e| {
let rel = e.path().strip_prefix(&walk_root).unwrap_or(e.path());
!path_matches_any_ignore(e.path(), &self.ignore)
&& !path_matches_any_glob(rel, &self.ignore_globs)
})
.filter_map(std::result::Result::ok)
.collect();
raw.retain(|e| e.path() != walk_root.as_path());
raw.sort_by(|a, b| a.path().cmp(b.path()));
let entries: Vec<FsTreeEntry> = raw
.par_iter()
.map(|de| -> pf_core::Result<FsTreeEntry> {
let abs = de.path();
let rel = abs.strip_prefix(&walk_root).unwrap_or(abs);
let rel_str = rel.to_string_lossy().replace('\\', "/");
let meta = de
.metadata()
.map_err(|e| std::io::Error::other(e.to_string()))?;
let mode = unix_mode_string(&meta);
if meta.file_type().is_dir() {
return Ok(FsTreeEntry {
path: rel_str,
mode,
size: 0,
kind: FsEntryKind::Dir,
blob: None,
link_target: None,
});
}
if meta.file_type().is_symlink() {
let target = std::fs::read_link(abs)?;
let target_str = target.to_string_lossy().to_string();
let blob = blobs.put(target_str.as_bytes())?;
return Ok(FsTreeEntry {
path: rel_str,
mode,
size: target_str.len() as u64,
kind: FsEntryKind::Symlink,
blob: Some(blob),
link_target: Some(target_str),
});
}
let bytes = std::fs::read(abs)?;
let size = bytes.len() as u64;
let digest = blobs.put(&bytes)?;
Ok(FsTreeEntry {
path: rel_str,
mode,
size,
kind: FsEntryKind::File,
blob: Some(digest),
link_target: None,
})
})
.collect::<pf_core::Result<Vec<_>>>()?;
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries,
};
let json = serde_json::to_vec(&tree)?;
blobs.put(&json)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct RestoreOptions {
pub allow_absolute_symlinks: bool,
}
pub fn restore_tree(
blobs: &Arc<dyn BlobStore>,
tree_digest: &Digest256,
dst: impl AsRef<Path>,
) -> pf_core::Result<()> {
restore_tree_with_options(blobs, tree_digest, dst, RestoreOptions::default())
}
pub fn restore_tree_with_options(
blobs: &Arc<dyn BlobStore>,
tree_digest: &Digest256,
dst: impl AsRef<Path>,
opts: RestoreOptions,
) -> pf_core::Result<()> {
let dst = dst.as_ref();
if dst.exists() {
return Err(pf_core::Error::Io(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!(
"restore_tree refuses to overwrite existing path {}",
dst.display()
),
)));
}
let tree_bytes = blobs.get(tree_digest)?;
let tree: FsTree = serde_json::from_slice(&tree_bytes)?;
if tree.kind != "fs.tree.v1" {
return Err(pf_core::Error::Integrity(format!(
"expected fs.tree.v1, got {}",
tree.kind
)));
}
let parent = dst.parent().unwrap_or_else(|| Path::new("."));
std::fs::create_dir_all(parent)?;
let staging = parent.join(format!(
".pf-restore.{}.{}",
std::process::id(),
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
));
std::fs::create_dir(&staging)?;
for e in tree
.entries
.iter()
.filter(|e| matches!(e.kind, FsEntryKind::Dir))
{
let safe = safe_join(&staging, &e.path)?;
std::fs::create_dir_all(&safe)?;
apply_mode(&safe, &e.mode)?;
}
for e in &tree.entries {
let p = safe_join(&staging, &e.path)?;
match e.kind {
FsEntryKind::Dir => {}
FsEntryKind::File => {
let blob = e.blob.as_ref().ok_or_else(|| {
pf_core::Error::Integrity(format!("file entry {} missing blob", e.path))
})?;
let bytes = blobs.get(blob)?;
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&p, bytes)?;
apply_mode(&p, &e.mode)?;
}
FsEntryKind::Symlink => {
let raw_target = e.link_target.as_ref().ok_or_else(|| {
pf_core::Error::Integrity(format!(
"symlink entry {} missing link_target",
e.path
))
})?;
if Path::new(raw_target).is_absolute() {
if opts.allow_absolute_symlinks {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent)?;
}
#[cfg(unix)]
std::os::unix::fs::symlink(raw_target, &p)?;
#[cfg(not(unix))]
std::fs::write(&p, raw_target.as_bytes())?;
} else {
eprintln!(
"warning: skipped absolute symlink {} -> {} \
(pass --allow-absolute-symlinks to restore)",
e.path, raw_target
);
}
continue;
}
check_symlink_target(&staging, &p, raw_target)?;
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent)?;
}
#[cfg(unix)]
std::os::unix::fs::symlink(raw_target, &p)?;
#[cfg(not(unix))]
std::fs::write(&p, raw_target.as_bytes())?;
}
}
}
std::fs::rename(&staging, dst)?;
Ok(())
}
fn has_glob_chars(entry: &str) -> bool {
entry.contains('*') || entry.contains('?') || entry.contains('[')
}
fn compile_globs(ignores: &[String]) -> Vec<globset::GlobMatcher> {
let mut out = Vec::new();
for ign in ignores {
if !has_glob_chars(ign) {
continue;
}
match globset::Glob::new(ign) {
Ok(g) => out.push(g.compile_matcher()),
Err(e) => tracing::warn!("ignore: invalid glob {ign:?}: {e}"),
}
}
out
}
fn path_matches_any_glob(relative_path: &Path, globs: &[globset::GlobMatcher]) -> bool {
if globs.is_empty() {
return false;
}
for g in globs {
if g.is_match(relative_path) {
return true;
}
if let Some(name) = relative_path.file_name()
&& g.is_match(Path::new(name))
{
return true;
}
}
false
}
fn path_matches_any_ignore(path: &Path, ignores: &[String]) -> bool {
let comps: Vec<&str> = path
.components()
.filter_map(|c| match c {
std::path::Component::Normal(s) => s.to_str(),
_ => None,
})
.collect();
for ign in ignores {
let needles: Vec<&str> = ign.split('/').filter(|s| !s.is_empty()).collect();
if needles.is_empty() {
continue;
}
for w in comps.windows(needles.len()) {
if w == needles.as_slice() {
return true;
}
}
}
false
}
fn safe_join(root: &Path, relative: &str) -> pf_core::Result<PathBuf> {
let candidate = Path::new(relative);
if candidate.is_absolute() {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry has absolute path {relative:?} — refusing"
)));
}
for comp in candidate.components() {
match comp {
std::path::Component::ParentDir => {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry path {relative:?} contains `..` — refusing"
)));
}
std::path::Component::RootDir | std::path::Component::Prefix(_) => {
return Err(pf_core::Error::Integrity(format!(
"fs.tree entry path {relative:?} has root/prefix — refusing"
)));
}
std::path::Component::CurDir | std::path::Component::Normal(_) => {}
}
}
Ok(root.join(candidate))
}
fn check_symlink_target(root: &Path, link_path: &Path, target: &str) -> pf_core::Result<()> {
let target_path = Path::new(target);
if target_path.is_absolute() {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} is absolute — refusing"
)));
}
let link_depth = link_path
.strip_prefix(root)
.ok()
.map_or(0, |p| p.components().count().saturating_sub(1));
let mut depth = isize::try_from(link_depth).unwrap_or(isize::MAX);
for comp in target_path.components() {
match comp {
std::path::Component::ParentDir => depth -= 1,
std::path::Component::Normal(_) => depth += 1,
std::path::Component::CurDir => {}
std::path::Component::RootDir | std::path::Component::Prefix(_) => {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} has root/prefix — refusing"
)));
}
}
if depth < 0 {
return Err(pf_core::Error::Integrity(format!(
"symlink target {target:?} escapes restore root — refusing"
)));
}
}
Ok(())
}
#[cfg(unix)]
fn apply_mode(path: &Path, mode: &str) -> pf_core::Result<()> {
use std::os::unix::fs::PermissionsExt as _;
let raw = u32::from_str_radix(mode, 8).unwrap_or(0o644);
let perm = std::fs::Permissions::from_mode(raw & 0o7777);
let meta = std::fs::symlink_metadata(path)?;
if meta.file_type().is_symlink() {
return Ok(());
}
std::fs::set_permissions(path, perm)?;
Ok(())
}
#[cfg(not(unix))]
fn apply_mode(_path: &Path, _mode: &str) -> pf_core::Result<()> {
Ok(())
}
#[cfg(target_os = "macos")]
fn apfs_clone(src: &Path) -> std::io::Result<PathBuf> {
use std::process::Command;
let dst = std::env::temp_dir().join(format!(
"pf-apfs-clone.{}.{}",
std::process::id(),
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
));
let status = Command::new("cp")
.args(["-c", "-R"])
.arg(src)
.arg(&dst)
.status()?;
if !status.success() {
return Err(std::io::Error::other(format!(
"cp -c -R exit status: {status:?}"
)));
}
Ok(dst)
}
#[cfg(not(target_os = "macos"))]
fn apfs_clone(_src: &Path) -> std::io::Result<PathBuf> {
Err(std::io::Error::other("APFS clone only available on macOS"))
}
#[cfg(unix)]
fn unix_mode_string(meta: &std::fs::Metadata) -> String {
use std::os::unix::fs::PermissionsExt;
format!("{:04o}", meta.permissions().mode() & 0o7777)
}
#[cfg(not(unix))]
fn unix_mode_string(meta: &std::fs::Metadata) -> String {
if meta.permissions().readonly() {
"0444".into()
} else {
"0644".into()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pf_core::cas::MemBlobStore;
use std::sync::Arc;
use tempfile::TempDir;
fn write(dir: &Path, rel: &str, contents: &[u8]) {
let p = dir.join(rel);
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(&p, contents).unwrap();
}
#[test]
fn round_trip_small_tree() {
let src = TempDir::new().unwrap();
write(src.path(), "a.txt", b"hello");
write(src.path(), "sub/b.txt", b"world");
write(src.path(), "sub/c.bin", &vec![0xABu8; 8 * 1024]);
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let tree_cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("restored");
restore_tree(&blobs, &tree_cid, &dst).unwrap();
assert_eq!(std::fs::read(dst.join("a.txt")).unwrap(), b"hello");
assert_eq!(std::fs::read(dst.join("sub/b.txt")).unwrap(), b"world");
assert_eq!(
std::fs::read(dst.join("sub/c.bin")).unwrap().len(),
8 * 1024
);
}
#[test]
fn capture_is_deterministic() {
let src = TempDir::new().unwrap();
write(src.path(), "a.txt", b"hello");
write(src.path(), "b.txt", b"world");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid1 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let cid2 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
assert_eq!(
cid1, cid2,
"capture of identical tree must be byte-identical"
);
}
#[test]
fn ignored_paths_are_skipped() {
let src = TempDir::new().unwrap();
write(src.path(), "kept.txt", b"keep");
write(src.path(), "node_modules/dep/index.js", b"skip");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let bytes = blobs.get(&cid).unwrap();
let tree: FsTree = serde_json::from_slice(&bytes).unwrap();
assert!(tree.entries.iter().any(|e| e.path == "kept.txt"));
assert!(
!tree
.entries
.iter()
.any(|e| e.path.starts_with("node_modules"))
);
}
#[cfg(unix)]
#[test]
fn symlinks_are_captured_as_symlinks() {
let src = TempDir::new().unwrap();
write(src.path(), "real.txt", b"data");
std::os::unix::fs::symlink("real.txt", src.path().join("link.txt")).unwrap();
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("r");
restore_tree(&blobs, &cid, &dst).unwrap();
let meta = std::fs::symlink_metadata(dst.join("link.txt")).unwrap();
assert!(meta.file_type().is_symlink());
assert_eq!(
std::fs::read_link(dst.join("link.txt"))
.unwrap()
.to_str()
.unwrap(),
"real.txt"
);
}
#[test]
fn malicious_relative_path_traversal_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let payload = b"PWNED";
let blob = blobs.put(payload).unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "../../escape.txt".into(),
mode: "100644".into(),
size: payload.len() as u64,
kind: FsEntryKind::File,
blob: Some(blob),
link_target: None,
}],
};
let tree_bytes = serde_json::to_vec(&tree).unwrap();
let tree_cid = blobs.put(&tree_bytes).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("`..`") || format!("{err}").contains("refusing"),
"expected path-traversal refusal, got {err}"
);
assert!(!restore_root.path().join("escape.txt").exists());
}
#[test]
fn malicious_absolute_path_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let blob = blobs.put(b"x").unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "/tmp/should-not-write".into(),
mode: "100644".into(),
size: 1,
kind: FsEntryKind::File,
blob: Some(blob),
link_target: None,
}],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("absolute") || format!("{err}").contains("refusing"),
"expected absolute-path refusal, got {err}"
);
}
#[cfg(unix)]
#[test]
fn malicious_symlink_escape_is_refused() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let target_str = "../../escape";
let blob = blobs.put(target_str.as_bytes()).unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "evil.lnk".into(),
mode: "120777".into(),
size: target_str.len() as u64,
kind: FsEntryKind::Symlink,
blob: Some(blob),
link_target: Some(target_str.to_owned()),
}],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("dst");
let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
assert!(
format!("{err}").contains("escape") || format!("{err}").contains("refusing"),
"expected symlink-escape refusal, got {err}"
);
}
#[cfg(unix)]
#[test]
fn absolute_symlink_skipped_by_default_with_rest_restored() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let file_blob = blobs.put(b"hello\n").unwrap();
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![
FsTreeEntry {
path: "abs.lnk".into(),
mode: "120777".into(),
size: 9,
kind: FsEntryKind::Symlink,
blob: None,
link_target: Some("/var/log/agent".into()),
},
FsTreeEntry {
path: "src/main.py".into(),
mode: "100644".into(),
size: 6,
kind: FsEntryKind::File,
blob: Some(file_blob),
link_target: None,
},
],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("out");
restore_tree(&blobs, &tree_cid, &dst).unwrap();
assert!(
!dst.join("abs.lnk").exists(),
"absolute symlink must be skipped by default"
);
assert_eq!(
std::fs::read_to_string(dst.join("src/main.py")).unwrap(),
"hello\n",
"rest of the tree must restore normally"
);
}
#[cfg(unix)]
#[test]
fn allow_absolute_symlinks_restores_them_verbatim() {
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let tree = FsTree {
kind: "fs.tree.v1".into(),
entries: vec![FsTreeEntry {
path: "abs.lnk".into(),
mode: "120777".into(),
size: 9,
kind: FsEntryKind::Symlink,
blob: None,
link_target: Some("/var/log/agent".into()),
}],
};
let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("out");
restore_tree_with_options(
&blobs,
&tree_cid,
&dst,
RestoreOptions {
allow_absolute_symlinks: true,
},
)
.unwrap();
let link_meta = std::fs::symlink_metadata(dst.join("abs.lnk")).unwrap();
assert!(link_meta.file_type().is_symlink());
let target = std::fs::read_link(dst.join("abs.lnk")).unwrap();
assert_eq!(target.to_str().unwrap(), "/var/log/agent");
}
#[cfg(unix)]
#[test]
fn executable_mode_is_restored() {
use std::os::unix::fs::PermissionsExt as _;
let src = TempDir::new().unwrap();
write(src.path(), "script.sh", b"#!/bin/sh\necho hi\n");
let scr = src.path().join("script.sh");
std::fs::set_permissions(&scr, std::fs::Permissions::from_mode(0o755)).unwrap();
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let restore_root = TempDir::new().unwrap();
let dst = restore_root.path().join("r");
restore_tree(&blobs, &cid, &dst).unwrap();
let meta = std::fs::metadata(dst.join("script.sh")).unwrap();
assert_eq!(
meta.permissions().mode() & 0o7777,
0o755,
"executable bit must survive snapshot+restore"
);
}
#[test]
fn ignore_matches_segments_not_substrings() {
let src = TempDir::new().unwrap();
write(src.path(), "src/targeted/keep.txt", b"keep");
write(src.path(), "target/should-skip.txt", b"skip");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(
paths.contains(&"src/targeted/keep.txt"),
"src/targeted/keep.txt must NOT be filtered (was: {paths:?})"
);
assert!(
!paths.iter().any(|p| p.starts_with("target/")),
"target/ subtree must be filtered (was: {paths:?})"
);
}
#[test]
fn default_ignores_skip_python_cache_dirs() {
let src = TempDir::new().unwrap();
write(src.path(), "src/main.py", b"print('hi')\n");
write(
src.path(),
"src/__pycache__/main.cpython-313.pyc",
b"\x03\xf3\r\n", );
write(src.path(), ".pytest_cache/CACHEDIR.TAG", b"Signature: ...");
write(src.path(), ".mypy_cache/3.13/CACHEDIR.TAG", b"...");
write(src.path(), ".ruff_cache/0.6.0/foo", b"x");
write(src.path(), ".venv/bin/python", b"#!/...\n");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(
paths.contains(&"src/main.py"),
"real source file must survive: {paths:?}"
);
for cache_pat in [
"__pycache__",
".pytest_cache",
".mypy_cache",
".ruff_cache",
".venv",
] {
assert!(
!paths.iter().any(|p| p.contains(cache_pat)),
"{cache_pat} must be filtered by default; got: {paths:?}"
);
}
}
#[test]
#[allow(clippy::case_sensitive_file_extension_comparisons)]
fn glob_patterns_match_files_anywhere_in_tree() {
let src = TempDir::new().unwrap();
write(src.path(), "src/main.py", b"keep");
write(src.path(), "src/legacy.pyc", b"skip-by-glob");
write(src.path(), "build/output.pyc", b"skip-by-glob");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(
paths.contains(&"src/main.py"),
"non-glob source must survive: {paths:?}"
);
assert!(
!paths.iter().any(|p| p.ends_with(".pyc")),
"*.pyc glob must filter every .pyc anywhere: {paths:?}"
);
}
#[test]
fn opt_out_of_default_ignores_captures_caches() {
let src = TempDir::new().unwrap();
write(src.path(), "__pycache__/foo.pyc", b"x");
write(src.path(), "src/main.py", b"hi");
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new_without_default_ignores(src.path())
.capture(&blobs)
.unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(
paths.iter().any(|p| p.contains("__pycache__")),
"without default ignores, __pycache__ must round-trip: {paths:?}"
);
}
#[test]
#[allow(clippy::case_sensitive_file_extension_comparisons)]
fn ignore_from_file_applies_each_line() {
let src = TempDir::new().unwrap();
write(src.path(), "src/main.py", b"keep");
write(src.path(), "secrets/api.key", b"private");
write(src.path(), "logs/today.log", b"verbose");
write(
src.path(),
".pfignore",
b"# project ignores\nsecrets\n*.log\n",
);
let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
let cid = WalkFsCapture::new(src.path())
.ignore_from(src.path().join(".pfignore"))
.unwrap()
.capture(&blobs)
.unwrap();
let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
assert!(paths.contains(&"src/main.py"));
assert!(
!paths.iter().any(|p| p.starts_with("secrets/")),
"secrets/ should be filtered by .pfignore: {paths:?}"
);
assert!(
!paths.iter().any(|p| p.ends_with(".log")),
"*.log glob from .pfignore should filter logs: {paths:?}"
);
}
}