1use pf_core::cas::BlobStore;
5use pf_core::digest::Digest256;
6
7use rayon::prelude::*;
8use serde::{Deserialize, Serialize};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
14pub struct FsTreeEntry {
15 pub path: String,
17 pub mode: String,
20 pub size: u64,
22 pub kind: FsEntryKind,
24 #[serde(default, skip_serializing_if = "Option::is_none")]
27 pub blob: Option<Digest256>,
28 #[serde(default, skip_serializing_if = "Option::is_none")]
30 pub link_target: Option<String>,
31}
32
33#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum FsEntryKind {
37 File,
39 Dir,
41 Symlink,
43}
44
45#[derive(Clone, Debug, Serialize, Deserialize)]
47pub struct FsTree {
48 pub kind: String,
50 pub entries: Vec<FsTreeEntry>,
52}
53
54pub struct WalkFsCapture {
61 root: PathBuf,
62 use_apfs_clone: bool,
63 follow_symlinks: bool,
64 ignore: Vec<String>,
65 ignore_globs: Vec<globset::GlobMatcher>,
69}
70
71const DEFAULT_EXTRA_IGNORES: &[&str] = &[
81 "__pycache__",
82 ".pytest_cache",
83 ".mypy_cache",
84 ".ruff_cache",
85 ".tox",
86 ".coverage",
87 ".venv",
88 ".DS_Store",
89 "*.pyc",
90 "*.pyo",
91];
92
93impl WalkFsCapture {
94 pub fn new(root: impl AsRef<Path>) -> Self {
96 let mut ignore: Vec<String> = vec![
97 ".git/objects".into(),
98 "target".into(),
99 "node_modules".into(),
100 ".pfcid".into(),
104 ];
105 for extra in DEFAULT_EXTRA_IGNORES {
106 ignore.push((*extra).to_owned());
107 }
108 let ignore_globs = compile_globs(&ignore);
109 Self {
110 root: root.as_ref().to_path_buf(),
111 use_apfs_clone: false,
112 follow_symlinks: false,
113 ignore,
114 ignore_globs,
115 }
116 }
117
118 pub fn new_without_default_ignores(root: impl AsRef<Path>) -> Self {
125 let ignore: Vec<String> = vec![
126 ".git/objects".into(),
127 "target".into(),
128 "node_modules".into(),
129 ".pfcid".into(),
130 ];
131 let ignore_globs = compile_globs(&ignore);
132 Self {
133 root: root.as_ref().to_path_buf(),
134 use_apfs_clone: false,
135 follow_symlinks: false,
136 ignore,
137 ignore_globs,
138 }
139 }
140
141 #[must_use]
147 pub fn use_apfs_clone(mut self, enable: bool) -> Self {
148 self.use_apfs_clone = enable;
149 self
150 }
151
152 #[must_use]
155 pub fn follow_symlinks(mut self, enable: bool) -> Self {
156 self.follow_symlinks = enable;
157 self
158 }
159
160 #[must_use]
171 pub fn ignore(mut self, fragment: impl Into<String>) -> Self {
172 let entry: String = fragment.into();
173 if has_glob_chars(&entry)
174 && let Ok(g) = globset::Glob::new(&entry)
175 {
176 self.ignore_globs.push(g.compile_matcher());
177 }
178 self.ignore.push(entry);
179 self
180 }
181
182 pub fn ignore_from(mut self, path: impl AsRef<Path>) -> std::io::Result<Self> {
195 let path = path.as_ref();
196 if !path.exists() {
197 return Ok(self);
198 }
199 let content = std::fs::read_to_string(path)?;
200 for raw in content.lines() {
201 let line = raw.trim();
202 if line.is_empty() || line.starts_with('#') {
203 continue;
204 }
205 if line.starts_with('!') {
206 tracing::warn!(
207 "ignoring gitignore negation in {}: {} (negation not yet supported in v1.0.13)",
208 path.display(),
209 line
210 );
211 continue;
212 }
213 let trimmed = line.trim_start_matches('/').trim_end_matches('/');
214 if trimmed.is_empty() {
215 continue;
216 }
217 self = self.ignore(trimmed);
218 }
219 Ok(self)
220 }
221
222 pub fn capture(&self, blobs: &Arc<dyn BlobStore>) -> pf_core::Result<Digest256> {
224 let walk_root: PathBuf = if self.use_apfs_clone && cfg!(target_os = "macos") {
227 apfs_clone(&self.root).unwrap_or_else(|_| self.root.clone())
228 } else {
229 self.root.clone()
230 };
231
232 let mut raw: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(&walk_root)
234 .follow_links(self.follow_symlinks)
235 .into_iter()
236 .filter_entry(|e| {
237 let rel = e.path().strip_prefix(&walk_root).unwrap_or(e.path());
251 !path_matches_any_ignore(e.path(), &self.ignore)
252 && !path_matches_any_glob(rel, &self.ignore_globs)
253 })
254 .filter_map(std::result::Result::ok)
255 .collect();
256
257 raw.retain(|e| e.path() != walk_root.as_path());
259
260 raw.sort_by(|a, b| a.path().cmp(b.path()));
262
263 let entries: Vec<FsTreeEntry> = raw
265 .par_iter()
266 .map(|de| -> pf_core::Result<FsTreeEntry> {
267 let abs = de.path();
268 let rel = abs.strip_prefix(&walk_root).unwrap_or(abs);
269 let rel_str = rel.to_string_lossy().replace('\\', "/");
270 let meta = de
271 .metadata()
272 .map_err(|e| std::io::Error::other(e.to_string()))?;
273 let mode = unix_mode_string(&meta);
274
275 if meta.file_type().is_dir() {
276 return Ok(FsTreeEntry {
277 path: rel_str,
278 mode,
279 size: 0,
280 kind: FsEntryKind::Dir,
281 blob: None,
282 link_target: None,
283 });
284 }
285 if meta.file_type().is_symlink() {
286 let target = std::fs::read_link(abs)?;
287 let target_str = target.to_string_lossy().to_string();
288 let blob = blobs.put(target_str.as_bytes())?;
289 return Ok(FsTreeEntry {
290 path: rel_str,
291 mode,
292 size: target_str.len() as u64,
293 kind: FsEntryKind::Symlink,
294 blob: Some(blob),
295 link_target: Some(target_str),
296 });
297 }
298 let bytes = std::fs::read(abs)?;
300 let size = bytes.len() as u64;
301 let digest = blobs.put(&bytes)?;
302 Ok(FsTreeEntry {
303 path: rel_str,
304 mode,
305 size,
306 kind: FsEntryKind::File,
307 blob: Some(digest),
308 link_target: None,
309 })
310 })
311 .collect::<pf_core::Result<Vec<_>>>()?;
312
313 let tree = FsTree {
314 kind: "fs.tree.v1".into(),
315 entries,
316 };
317 let json = serde_json::to_vec(&tree)?;
318 blobs.put(&json)
319 }
320}
321
322#[derive(Debug, Clone, Copy, Default)]
342pub struct RestoreOptions {
343 pub allow_absolute_symlinks: bool,
345}
346
347pub fn restore_tree(
355 blobs: &Arc<dyn BlobStore>,
356 tree_digest: &Digest256,
357 dst: impl AsRef<Path>,
358) -> pf_core::Result<()> {
359 restore_tree_with_options(blobs, tree_digest, dst, RestoreOptions::default())
360}
361
362pub fn restore_tree_with_options(
365 blobs: &Arc<dyn BlobStore>,
366 tree_digest: &Digest256,
367 dst: impl AsRef<Path>,
368 opts: RestoreOptions,
369) -> pf_core::Result<()> {
370 let dst = dst.as_ref();
371 if dst.exists() {
372 return Err(pf_core::Error::Io(std::io::Error::new(
373 std::io::ErrorKind::AlreadyExists,
374 format!(
375 "restore_tree refuses to overwrite existing path {}",
376 dst.display()
377 ),
378 )));
379 }
380 let tree_bytes = blobs.get(tree_digest)?;
381 let tree: FsTree = serde_json::from_slice(&tree_bytes)?;
382 if tree.kind != "fs.tree.v1" {
383 return Err(pf_core::Error::Integrity(format!(
384 "expected fs.tree.v1, got {}",
385 tree.kind
386 )));
387 }
388
389 let parent = dst.parent().unwrap_or_else(|| Path::new("."));
391 std::fs::create_dir_all(parent)?;
392 let staging = parent.join(format!(
393 ".pf-restore.{}.{}",
394 std::process::id(),
395 chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
396 ));
397 std::fs::create_dir(&staging)?;
398
399 for e in tree
401 .entries
402 .iter()
403 .filter(|e| matches!(e.kind, FsEntryKind::Dir))
404 {
405 let safe = safe_join(&staging, &e.path)?;
406 std::fs::create_dir_all(&safe)?;
407 apply_mode(&safe, &e.mode)?;
408 }
409 for e in &tree.entries {
411 let p = safe_join(&staging, &e.path)?;
412 match e.kind {
413 FsEntryKind::Dir => {}
414 FsEntryKind::File => {
415 let blob = e.blob.as_ref().ok_or_else(|| {
416 pf_core::Error::Integrity(format!("file entry {} missing blob", e.path))
417 })?;
418 let bytes = blobs.get(blob)?;
419 if let Some(parent) = p.parent() {
420 std::fs::create_dir_all(parent)?;
421 }
422 std::fs::write(&p, bytes)?;
423 apply_mode(&p, &e.mode)?;
424 }
425 FsEntryKind::Symlink => {
426 let raw_target = e.link_target.as_ref().ok_or_else(|| {
427 pf_core::Error::Integrity(format!(
428 "symlink entry {} missing link_target",
429 e.path
430 ))
431 })?;
432 if Path::new(raw_target).is_absolute() {
443 if opts.allow_absolute_symlinks {
444 if let Some(parent) = p.parent() {
445 std::fs::create_dir_all(parent)?;
446 }
447 #[cfg(unix)]
448 std::os::unix::fs::symlink(raw_target, &p)?;
449 #[cfg(not(unix))]
450 std::fs::write(&p, raw_target.as_bytes())?;
451 } else {
452 eprintln!(
453 "warning: skipped absolute symlink {} -> {} \
454 (pass --allow-absolute-symlinks to restore)",
455 e.path, raw_target
456 );
457 }
458 continue;
459 }
460 check_symlink_target(&staging, &p, raw_target)?;
461 if let Some(parent) = p.parent() {
462 std::fs::create_dir_all(parent)?;
463 }
464 #[cfg(unix)]
465 std::os::unix::fs::symlink(raw_target, &p)?;
466 #[cfg(not(unix))]
467 std::fs::write(&p, raw_target.as_bytes())?;
468 }
469 }
470 }
471
472 std::fs::rename(&staging, dst)?;
474 Ok(())
475}
476
477fn has_glob_chars(entry: &str) -> bool {
494 entry.contains('*') || entry.contains('?') || entry.contains('[')
495}
496
497fn compile_globs(ignores: &[String]) -> Vec<globset::GlobMatcher> {
504 let mut out = Vec::new();
505 for ign in ignores {
506 if !has_glob_chars(ign) {
507 continue;
508 }
509 match globset::Glob::new(ign) {
510 Ok(g) => out.push(g.compile_matcher()),
511 Err(e) => tracing::warn!("ignore: invalid glob {ign:?}: {e}"),
512 }
513 }
514 out
515}
516
517fn path_matches_any_glob(relative_path: &Path, globs: &[globset::GlobMatcher]) -> bool {
521 if globs.is_empty() {
522 return false;
523 }
524 for g in globs {
525 if g.is_match(relative_path) {
526 return true;
527 }
528 if let Some(name) = relative_path.file_name()
532 && g.is_match(Path::new(name))
533 {
534 return true;
535 }
536 }
537 false
538}
539
540fn path_matches_any_ignore(path: &Path, ignores: &[String]) -> bool {
541 let comps: Vec<&str> = path
542 .components()
543 .filter_map(|c| match c {
544 std::path::Component::Normal(s) => s.to_str(),
545 _ => None,
546 })
547 .collect();
548 for ign in ignores {
549 let needles: Vec<&str> = ign.split('/').filter(|s| !s.is_empty()).collect();
553 if needles.is_empty() {
554 continue;
555 }
556 for w in comps.windows(needles.len()) {
557 if w == needles.as_slice() {
558 return true;
559 }
560 }
561 }
562 false
563}
564
565fn safe_join(root: &Path, relative: &str) -> pf_core::Result<PathBuf> {
571 let candidate = Path::new(relative);
572 if candidate.is_absolute() {
573 return Err(pf_core::Error::Integrity(format!(
574 "fs.tree entry has absolute path {relative:?} — refusing"
575 )));
576 }
577 for comp in candidate.components() {
580 match comp {
581 std::path::Component::ParentDir => {
582 return Err(pf_core::Error::Integrity(format!(
583 "fs.tree entry path {relative:?} contains `..` — refusing"
584 )));
585 }
586 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
587 return Err(pf_core::Error::Integrity(format!(
588 "fs.tree entry path {relative:?} has root/prefix — refusing"
589 )));
590 }
591 std::path::Component::CurDir | std::path::Component::Normal(_) => {}
592 }
593 }
594 Ok(root.join(candidate))
595}
596
597fn check_symlink_target(root: &Path, link_path: &Path, target: &str) -> pf_core::Result<()> {
603 let target_path = Path::new(target);
604 if target_path.is_absolute() {
605 return Err(pf_core::Error::Integrity(format!(
606 "symlink target {target:?} is absolute — refusing"
607 )));
608 }
609 let link_depth = link_path
613 .strip_prefix(root)
614 .ok()
615 .map_or(0, |p| p.components().count().saturating_sub(1));
616 let mut depth = isize::try_from(link_depth).unwrap_or(isize::MAX);
617 for comp in target_path.components() {
618 match comp {
619 std::path::Component::ParentDir => depth -= 1,
620 std::path::Component::Normal(_) => depth += 1,
621 std::path::Component::CurDir => {}
622 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
623 return Err(pf_core::Error::Integrity(format!(
624 "symlink target {target:?} has root/prefix — refusing"
625 )));
626 }
627 }
628 if depth < 0 {
629 return Err(pf_core::Error::Integrity(format!(
630 "symlink target {target:?} escapes restore root — refusing"
631 )));
632 }
633 }
634 Ok(())
635}
636
637#[cfg(unix)]
642fn apply_mode(path: &Path, mode: &str) -> pf_core::Result<()> {
643 use std::os::unix::fs::PermissionsExt as _;
644 let raw = u32::from_str_radix(mode, 8).unwrap_or(0o644);
645 let perm = std::fs::Permissions::from_mode(raw & 0o7777);
646 let meta = std::fs::symlink_metadata(path)?;
649 if meta.file_type().is_symlink() {
650 return Ok(());
651 }
652 std::fs::set_permissions(path, perm)?;
653 Ok(())
654}
655
656#[cfg(not(unix))]
657fn apply_mode(_path: &Path, _mode: &str) -> pf_core::Result<()> {
658 Ok(())
659}
660
661#[cfg(target_os = "macos")]
664fn apfs_clone(src: &Path) -> std::io::Result<PathBuf> {
665 use std::process::Command;
666 let dst = std::env::temp_dir().join(format!(
667 "pf-apfs-clone.{}.{}",
668 std::process::id(),
669 chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
670 ));
671 let status = Command::new("cp")
672 .args(["-c", "-R"])
673 .arg(src)
674 .arg(&dst)
675 .status()?;
676 if !status.success() {
677 return Err(std::io::Error::other(format!(
678 "cp -c -R exit status: {status:?}"
679 )));
680 }
681 Ok(dst)
682}
683
684#[cfg(not(target_os = "macos"))]
685fn apfs_clone(_src: &Path) -> std::io::Result<PathBuf> {
686 Err(std::io::Error::other("APFS clone only available on macOS"))
687}
688
689#[cfg(unix)]
692fn unix_mode_string(meta: &std::fs::Metadata) -> String {
693 use std::os::unix::fs::PermissionsExt;
694 format!("{:04o}", meta.permissions().mode() & 0o7777)
695}
696#[cfg(not(unix))]
697fn unix_mode_string(meta: &std::fs::Metadata) -> String {
698 if meta.permissions().readonly() {
699 "0444".into()
700 } else {
701 "0644".into()
702 }
703}
704
705#[cfg(test)]
706mod tests {
707 use super::*;
708 use pf_core::cas::MemBlobStore;
709 use std::sync::Arc;
710 use tempfile::TempDir;
711
712 fn write(dir: &Path, rel: &str, contents: &[u8]) {
713 let p = dir.join(rel);
714 if let Some(parent) = p.parent() {
715 std::fs::create_dir_all(parent).unwrap();
716 }
717 std::fs::write(&p, contents).unwrap();
718 }
719
720 #[test]
721 fn round_trip_small_tree() {
722 let src = TempDir::new().unwrap();
723 write(src.path(), "a.txt", b"hello");
724 write(src.path(), "sub/b.txt", b"world");
725 write(src.path(), "sub/c.bin", &vec![0xABu8; 8 * 1024]);
726
727 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
728 let tree_cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
729
730 let restore_root = TempDir::new().unwrap();
731 let dst = restore_root.path().join("restored");
732 restore_tree(&blobs, &tree_cid, &dst).unwrap();
733
734 assert_eq!(std::fs::read(dst.join("a.txt")).unwrap(), b"hello");
735 assert_eq!(std::fs::read(dst.join("sub/b.txt")).unwrap(), b"world");
736 assert_eq!(
737 std::fs::read(dst.join("sub/c.bin")).unwrap().len(),
738 8 * 1024
739 );
740 }
741
742 #[test]
743 fn capture_is_deterministic() {
744 let src = TempDir::new().unwrap();
745 write(src.path(), "a.txt", b"hello");
746 write(src.path(), "b.txt", b"world");
747 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
748 let cid1 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
749 let cid2 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
750 assert_eq!(
751 cid1, cid2,
752 "capture of identical tree must be byte-identical"
753 );
754 }
755
756 #[test]
757 fn ignored_paths_are_skipped() {
758 let src = TempDir::new().unwrap();
759 write(src.path(), "kept.txt", b"keep");
760 write(src.path(), "node_modules/dep/index.js", b"skip");
761 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
762 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
763 let bytes = blobs.get(&cid).unwrap();
764 let tree: FsTree = serde_json::from_slice(&bytes).unwrap();
765 assert!(tree.entries.iter().any(|e| e.path == "kept.txt"));
766 assert!(
767 !tree
768 .entries
769 .iter()
770 .any(|e| e.path.starts_with("node_modules"))
771 );
772 }
773
774 #[cfg(unix)]
775 #[test]
776 fn symlinks_are_captured_as_symlinks() {
777 let src = TempDir::new().unwrap();
778 write(src.path(), "real.txt", b"data");
779 std::os::unix::fs::symlink("real.txt", src.path().join("link.txt")).unwrap();
780 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
781 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
782
783 let restore_root = TempDir::new().unwrap();
784 let dst = restore_root.path().join("r");
785 restore_tree(&blobs, &cid, &dst).unwrap();
786 let meta = std::fs::symlink_metadata(dst.join("link.txt")).unwrap();
787 assert!(meta.file_type().is_symlink());
788 assert_eq!(
789 std::fs::read_link(dst.join("link.txt"))
790 .unwrap()
791 .to_str()
792 .unwrap(),
793 "real.txt"
794 );
795 }
796
797 #[test]
802 fn malicious_relative_path_traversal_is_refused() {
803 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
804 let payload = b"PWNED";
805 let blob = blobs.put(payload).unwrap();
806 let tree = FsTree {
807 kind: "fs.tree.v1".into(),
808 entries: vec![FsTreeEntry {
809 path: "../../escape.txt".into(),
810 mode: "100644".into(),
811 size: payload.len() as u64,
812 kind: FsEntryKind::File,
813 blob: Some(blob),
814 link_target: None,
815 }],
816 };
817 let tree_bytes = serde_json::to_vec(&tree).unwrap();
818 let tree_cid = blobs.put(&tree_bytes).unwrap();
819
820 let restore_root = TempDir::new().unwrap();
821 let dst = restore_root.path().join("dst");
822 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
823 assert!(
824 format!("{err}").contains("`..`") || format!("{err}").contains("refusing"),
825 "expected path-traversal refusal, got {err}"
826 );
827 assert!(!restore_root.path().join("escape.txt").exists());
829 }
830
831 #[test]
833 fn malicious_absolute_path_is_refused() {
834 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
835 let blob = blobs.put(b"x").unwrap();
836 let tree = FsTree {
837 kind: "fs.tree.v1".into(),
838 entries: vec![FsTreeEntry {
839 path: "/tmp/should-not-write".into(),
840 mode: "100644".into(),
841 size: 1,
842 kind: FsEntryKind::File,
843 blob: Some(blob),
844 link_target: None,
845 }],
846 };
847 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
848 let restore_root = TempDir::new().unwrap();
849 let dst = restore_root.path().join("dst");
850 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
851 assert!(
852 format!("{err}").contains("absolute") || format!("{err}").contains("refusing"),
853 "expected absolute-path refusal, got {err}"
854 );
855 }
856
857 #[cfg(unix)]
861 #[test]
862 fn malicious_symlink_escape_is_refused() {
863 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
864 let target_str = "../../escape";
865 let blob = blobs.put(target_str.as_bytes()).unwrap();
866 let tree = FsTree {
867 kind: "fs.tree.v1".into(),
868 entries: vec![FsTreeEntry {
869 path: "evil.lnk".into(),
870 mode: "120777".into(),
871 size: target_str.len() as u64,
872 kind: FsEntryKind::Symlink,
873 blob: Some(blob),
874 link_target: Some(target_str.to_owned()),
875 }],
876 };
877 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
878 let restore_root = TempDir::new().unwrap();
879 let dst = restore_root.path().join("dst");
880 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
881 assert!(
882 format!("{err}").contains("escape") || format!("{err}").contains("refusing"),
883 "expected symlink-escape refusal, got {err}"
884 );
885 }
886
887 #[cfg(unix)]
894 #[test]
895 fn absolute_symlink_skipped_by_default_with_rest_restored() {
896 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
897 let file_blob = blobs.put(b"hello\n").unwrap();
898 let tree = FsTree {
899 kind: "fs.tree.v1".into(),
900 entries: vec![
901 FsTreeEntry {
902 path: "abs.lnk".into(),
903 mode: "120777".into(),
904 size: 9,
905 kind: FsEntryKind::Symlink,
906 blob: None,
907 link_target: Some("/var/log/agent".into()),
908 },
909 FsTreeEntry {
910 path: "src/main.py".into(),
911 mode: "100644".into(),
912 size: 6,
913 kind: FsEntryKind::File,
914 blob: Some(file_blob),
915 link_target: None,
916 },
917 ],
918 };
919 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
920 let restore_root = TempDir::new().unwrap();
921 let dst = restore_root.path().join("out");
922 restore_tree(&blobs, &tree_cid, &dst).unwrap();
925 assert!(
926 !dst.join("abs.lnk").exists(),
927 "absolute symlink must be skipped by default"
928 );
929 assert_eq!(
930 std::fs::read_to_string(dst.join("src/main.py")).unwrap(),
931 "hello\n",
932 "rest of the tree must restore normally"
933 );
934 }
935
936 #[cfg(unix)]
940 #[test]
941 fn allow_absolute_symlinks_restores_them_verbatim() {
942 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
943 let tree = FsTree {
944 kind: "fs.tree.v1".into(),
945 entries: vec![FsTreeEntry {
946 path: "abs.lnk".into(),
947 mode: "120777".into(),
948 size: 9,
949 kind: FsEntryKind::Symlink,
950 blob: None,
951 link_target: Some("/var/log/agent".into()),
952 }],
953 };
954 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
955 let restore_root = TempDir::new().unwrap();
956 let dst = restore_root.path().join("out");
957 restore_tree_with_options(
958 &blobs,
959 &tree_cid,
960 &dst,
961 RestoreOptions {
962 allow_absolute_symlinks: true,
963 },
964 )
965 .unwrap();
966 let link_meta = std::fs::symlink_metadata(dst.join("abs.lnk")).unwrap();
967 assert!(link_meta.file_type().is_symlink());
968 let target = std::fs::read_link(dst.join("abs.lnk")).unwrap();
969 assert_eq!(target.to_str().unwrap(), "/var/log/agent");
970 }
971
972 #[cfg(unix)]
974 #[test]
975 fn executable_mode_is_restored() {
976 use std::os::unix::fs::PermissionsExt as _;
977 let src = TempDir::new().unwrap();
978 write(src.path(), "script.sh", b"#!/bin/sh\necho hi\n");
979 let scr = src.path().join("script.sh");
980 std::fs::set_permissions(&scr, std::fs::Permissions::from_mode(0o755)).unwrap();
981 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
982 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
983
984 let restore_root = TempDir::new().unwrap();
985 let dst = restore_root.path().join("r");
986 restore_tree(&blobs, &cid, &dst).unwrap();
987 let meta = std::fs::metadata(dst.join("script.sh")).unwrap();
988 assert_eq!(
989 meta.permissions().mode() & 0o7777,
990 0o755,
991 "executable bit must survive snapshot+restore"
992 );
993 }
994
995 #[test]
999 fn ignore_matches_segments_not_substrings() {
1000 let src = TempDir::new().unwrap();
1001 write(src.path(), "src/targeted/keep.txt", b"keep");
1002 write(src.path(), "target/should-skip.txt", b"skip");
1003 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
1004 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
1005 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
1006 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
1007 assert!(
1008 paths.contains(&"src/targeted/keep.txt"),
1009 "src/targeted/keep.txt must NOT be filtered (was: {paths:?})"
1010 );
1011 assert!(
1012 !paths.iter().any(|p| p.starts_with("target/")),
1013 "target/ subtree must be filtered (was: {paths:?})"
1014 );
1015 }
1016
1017 #[test]
1022 fn default_ignores_skip_python_cache_dirs() {
1023 let src = TempDir::new().unwrap();
1024 write(src.path(), "src/main.py", b"print('hi')\n");
1025 write(
1026 src.path(),
1027 "src/__pycache__/main.cpython-313.pyc",
1028 b"\x03\xf3\r\n", );
1030 write(src.path(), ".pytest_cache/CACHEDIR.TAG", b"Signature: ...");
1031 write(src.path(), ".mypy_cache/3.13/CACHEDIR.TAG", b"...");
1032 write(src.path(), ".ruff_cache/0.6.0/foo", b"x");
1033 write(src.path(), ".venv/bin/python", b"#!/...\n");
1034 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
1035 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
1036 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
1037 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
1038
1039 assert!(
1040 paths.contains(&"src/main.py"),
1041 "real source file must survive: {paths:?}"
1042 );
1043 for cache_pat in [
1044 "__pycache__",
1045 ".pytest_cache",
1046 ".mypy_cache",
1047 ".ruff_cache",
1048 ".venv",
1049 ] {
1050 assert!(
1051 !paths.iter().any(|p| p.contains(cache_pat)),
1052 "{cache_pat} must be filtered by default; got: {paths:?}"
1053 );
1054 }
1055 }
1056
1057 #[test]
1062 #[allow(clippy::case_sensitive_file_extension_comparisons)]
1063 fn glob_patterns_match_files_anywhere_in_tree() {
1064 let src = TempDir::new().unwrap();
1065 write(src.path(), "src/main.py", b"keep");
1066 write(src.path(), "src/legacy.pyc", b"skip-by-glob");
1067 write(src.path(), "build/output.pyc", b"skip-by-glob");
1068 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
1069 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
1070 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
1071 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
1072 assert!(
1073 paths.contains(&"src/main.py"),
1074 "non-glob source must survive: {paths:?}"
1075 );
1076 assert!(
1077 !paths.iter().any(|p| p.ends_with(".pyc")),
1078 "*.pyc glob must filter every .pyc anywhere: {paths:?}"
1079 );
1080 }
1081
1082 #[test]
1086 fn opt_out_of_default_ignores_captures_caches() {
1087 let src = TempDir::new().unwrap();
1088 write(src.path(), "__pycache__/foo.pyc", b"x");
1089 write(src.path(), "src/main.py", b"hi");
1090 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
1091 let cid = WalkFsCapture::new_without_default_ignores(src.path())
1092 .capture(&blobs)
1093 .unwrap();
1094 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
1095 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
1096 assert!(
1097 paths.iter().any(|p| p.contains("__pycache__")),
1098 "without default ignores, __pycache__ must round-trip: {paths:?}"
1099 );
1100 }
1101
1102 #[test]
1107 #[allow(clippy::case_sensitive_file_extension_comparisons)]
1108 fn ignore_from_file_applies_each_line() {
1109 let src = TempDir::new().unwrap();
1110 write(src.path(), "src/main.py", b"keep");
1111 write(src.path(), "secrets/api.key", b"private");
1112 write(src.path(), "logs/today.log", b"verbose");
1113 write(
1114 src.path(),
1115 ".pfignore",
1116 b"# project ignores\nsecrets\n*.log\n",
1117 );
1118 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
1119 let cid = WalkFsCapture::new(src.path())
1120 .ignore_from(src.path().join(".pfignore"))
1121 .unwrap()
1122 .capture(&blobs)
1123 .unwrap();
1124 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
1125 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
1126 assert!(paths.contains(&"src/main.py"));
1127 assert!(
1128 !paths.iter().any(|p| p.starts_with("secrets/")),
1129 "secrets/ should be filtered by .pfignore: {paths:?}"
1130 );
1131 assert!(
1132 !paths.iter().any(|p| p.ends_with(".log")),
1133 "*.log glob from .pfignore should filter logs: {paths:?}"
1134 );
1135 }
1136}