1use pf_core::cas::BlobStore;
5use pf_core::digest::Digest256;
6
7use rayon::prelude::*;
8use serde::{Deserialize, Serialize};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
14pub struct FsTreeEntry {
15 pub path: String,
17 pub mode: String,
20 pub size: u64,
22 pub kind: FsEntryKind,
24 #[serde(default, skip_serializing_if = "Option::is_none")]
27 pub blob: Option<Digest256>,
28 #[serde(default, skip_serializing_if = "Option::is_none")]
30 pub link_target: Option<String>,
31}
32
33#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum FsEntryKind {
37 File,
39 Dir,
41 Symlink,
43}
44
45#[derive(Clone, Debug, Serialize, Deserialize)]
47pub struct FsTree {
48 pub kind: String,
50 pub entries: Vec<FsTreeEntry>,
52}
53
54pub struct WalkFsCapture {
61 root: PathBuf,
62 use_apfs_clone: bool,
63 follow_symlinks: bool,
64 ignore: Vec<String>,
65 ignore_globs: Vec<globset::GlobMatcher>,
69}
70
71const DEFAULT_EXTRA_IGNORES: &[&str] = &[
81 "__pycache__",
82 ".pytest_cache",
83 ".mypy_cache",
84 ".ruff_cache",
85 ".tox",
86 ".coverage",
87 ".venv",
88 ".DS_Store",
89 "*.pyc",
90 "*.pyo",
91];
92
93impl WalkFsCapture {
94 pub fn new(root: impl AsRef<Path>) -> Self {
96 let mut ignore: Vec<String> = vec![
97 ".git/objects".into(),
98 "target".into(),
99 "node_modules".into(),
100 ".pfcid".into(),
104 ];
105 for extra in DEFAULT_EXTRA_IGNORES {
106 ignore.push((*extra).to_owned());
107 }
108 let ignore_globs = compile_globs(&ignore);
109 Self {
110 root: root.as_ref().to_path_buf(),
111 use_apfs_clone: false,
112 follow_symlinks: false,
113 ignore,
114 ignore_globs,
115 }
116 }
117
118 pub fn new_without_default_ignores(root: impl AsRef<Path>) -> Self {
125 let ignore: Vec<String> = vec![
126 ".git/objects".into(),
127 "target".into(),
128 "node_modules".into(),
129 ".pfcid".into(),
130 ];
131 let ignore_globs = compile_globs(&ignore);
132 Self {
133 root: root.as_ref().to_path_buf(),
134 use_apfs_clone: false,
135 follow_symlinks: false,
136 ignore,
137 ignore_globs,
138 }
139 }
140
141 #[must_use]
147 pub fn use_apfs_clone(mut self, enable: bool) -> Self {
148 self.use_apfs_clone = enable;
149 self
150 }
151
152 #[must_use]
155 pub fn follow_symlinks(mut self, enable: bool) -> Self {
156 self.follow_symlinks = enable;
157 self
158 }
159
160 #[must_use]
171 pub fn ignore(mut self, fragment: impl Into<String>) -> Self {
172 let entry: String = fragment.into();
173 if has_glob_chars(&entry)
174 && let Ok(g) = globset::Glob::new(&entry)
175 {
176 self.ignore_globs.push(g.compile_matcher());
177 }
178 self.ignore.push(entry);
179 self
180 }
181
182 pub fn ignore_from(mut self, path: impl AsRef<Path>) -> std::io::Result<Self> {
195 let path = path.as_ref();
196 if !path.exists() {
197 return Ok(self);
198 }
199 let content = std::fs::read_to_string(path)?;
200 for raw in content.lines() {
201 let line = raw.trim();
202 if line.is_empty() || line.starts_with('#') {
203 continue;
204 }
205 if line.starts_with('!') {
206 tracing::warn!(
207 "ignoring gitignore negation in {}: {} (negation not yet supported in v1.0.13)",
208 path.display(),
209 line
210 );
211 continue;
212 }
213 let trimmed = line.trim_start_matches('/').trim_end_matches('/');
214 if trimmed.is_empty() {
215 continue;
216 }
217 self = self.ignore(trimmed);
218 }
219 Ok(self)
220 }
221
222 pub fn capture(&self, blobs: &Arc<dyn BlobStore>) -> pf_core::Result<Digest256> {
224 let walk_root: PathBuf = if self.use_apfs_clone && cfg!(target_os = "macos") {
227 apfs_clone(&self.root).unwrap_or_else(|_| self.root.clone())
228 } else {
229 self.root.clone()
230 };
231
232 let mut raw: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(&walk_root)
234 .follow_links(self.follow_symlinks)
235 .into_iter()
236 .filter_entry(|e| {
237 let rel = e.path().strip_prefix(&walk_root).unwrap_or(e.path());
251 !path_matches_any_ignore(e.path(), &self.ignore)
252 && !path_matches_any_glob(rel, &self.ignore_globs)
253 })
254 .filter_map(std::result::Result::ok)
255 .collect();
256
257 raw.retain(|e| e.path() != walk_root.as_path());
259
260 raw.sort_by(|a, b| a.path().cmp(b.path()));
262
263 let entries: Vec<FsTreeEntry> = raw
265 .par_iter()
266 .map(|de| -> pf_core::Result<FsTreeEntry> {
267 let abs = de.path();
268 let rel = abs.strip_prefix(&walk_root).unwrap_or(abs);
269 let rel_str = rel.to_string_lossy().replace('\\', "/");
270 let meta = de
271 .metadata()
272 .map_err(|e| std::io::Error::other(e.to_string()))?;
273 let mode = unix_mode_string(&meta);
274
275 if meta.file_type().is_dir() {
276 return Ok(FsTreeEntry {
277 path: rel_str,
278 mode,
279 size: 0,
280 kind: FsEntryKind::Dir,
281 blob: None,
282 link_target: None,
283 });
284 }
285 if meta.file_type().is_symlink() {
286 let target = std::fs::read_link(abs)?;
287 let target_str = target.to_string_lossy().to_string();
288 let blob = blobs.put(target_str.as_bytes())?;
289 return Ok(FsTreeEntry {
290 path: rel_str,
291 mode,
292 size: target_str.len() as u64,
293 kind: FsEntryKind::Symlink,
294 blob: Some(blob),
295 link_target: Some(target_str),
296 });
297 }
298 let bytes = std::fs::read(abs)?;
300 let size = bytes.len() as u64;
301 let digest = blobs.put(&bytes)?;
302 Ok(FsTreeEntry {
303 path: rel_str,
304 mode,
305 size,
306 kind: FsEntryKind::File,
307 blob: Some(digest),
308 link_target: None,
309 })
310 })
311 .collect::<pf_core::Result<Vec<_>>>()?;
312
313 let tree = FsTree {
314 kind: "fs.tree.v1".into(),
315 entries,
316 };
317 let json = serde_json::to_vec(&tree)?;
318 blobs.put(&json)
319 }
320}
321
322pub fn restore_tree(
328 blobs: &Arc<dyn BlobStore>,
329 tree_digest: &Digest256,
330 dst: impl AsRef<Path>,
331) -> pf_core::Result<()> {
332 let dst = dst.as_ref();
333 if dst.exists() {
334 return Err(pf_core::Error::Io(std::io::Error::new(
335 std::io::ErrorKind::AlreadyExists,
336 format!(
337 "restore_tree refuses to overwrite existing path {}",
338 dst.display()
339 ),
340 )));
341 }
342 let tree_bytes = blobs.get(tree_digest)?;
343 let tree: FsTree = serde_json::from_slice(&tree_bytes)?;
344 if tree.kind != "fs.tree.v1" {
345 return Err(pf_core::Error::Integrity(format!(
346 "expected fs.tree.v1, got {}",
347 tree.kind
348 )));
349 }
350
351 let parent = dst.parent().unwrap_or_else(|| Path::new("."));
353 std::fs::create_dir_all(parent)?;
354 let staging = parent.join(format!(
355 ".pf-restore.{}.{}",
356 std::process::id(),
357 chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
358 ));
359 std::fs::create_dir(&staging)?;
360
361 for e in tree
363 .entries
364 .iter()
365 .filter(|e| matches!(e.kind, FsEntryKind::Dir))
366 {
367 let safe = safe_join(&staging, &e.path)?;
368 std::fs::create_dir_all(&safe)?;
369 apply_mode(&safe, &e.mode)?;
370 }
371 for e in &tree.entries {
373 let p = safe_join(&staging, &e.path)?;
374 match e.kind {
375 FsEntryKind::Dir => {}
376 FsEntryKind::File => {
377 let blob = e.blob.as_ref().ok_or_else(|| {
378 pf_core::Error::Integrity(format!("file entry {} missing blob", e.path))
379 })?;
380 let bytes = blobs.get(blob)?;
381 if let Some(parent) = p.parent() {
382 std::fs::create_dir_all(parent)?;
383 }
384 std::fs::write(&p, bytes)?;
385 apply_mode(&p, &e.mode)?;
386 }
387 FsEntryKind::Symlink => {
388 let raw_target = e.link_target.as_ref().ok_or_else(|| {
389 pf_core::Error::Integrity(format!(
390 "symlink entry {} missing link_target",
391 e.path
392 ))
393 })?;
394 check_symlink_target(&staging, &p, raw_target)?;
400 if let Some(parent) = p.parent() {
401 std::fs::create_dir_all(parent)?;
402 }
403 #[cfg(unix)]
404 std::os::unix::fs::symlink(raw_target, &p)?;
405 #[cfg(not(unix))]
406 std::fs::write(&p, raw_target.as_bytes())?;
407 }
408 }
409 }
410
411 std::fs::rename(&staging, dst)?;
413 Ok(())
414}
415
416fn has_glob_chars(entry: &str) -> bool {
433 entry.contains('*') || entry.contains('?') || entry.contains('[')
434}
435
436fn compile_globs(ignores: &[String]) -> Vec<globset::GlobMatcher> {
443 let mut out = Vec::new();
444 for ign in ignores {
445 if !has_glob_chars(ign) {
446 continue;
447 }
448 match globset::Glob::new(ign) {
449 Ok(g) => out.push(g.compile_matcher()),
450 Err(e) => tracing::warn!("ignore: invalid glob {ign:?}: {e}"),
451 }
452 }
453 out
454}
455
456fn path_matches_any_glob(relative_path: &Path, globs: &[globset::GlobMatcher]) -> bool {
460 if globs.is_empty() {
461 return false;
462 }
463 for g in globs {
464 if g.is_match(relative_path) {
465 return true;
466 }
467 if let Some(name) = relative_path.file_name()
471 && g.is_match(Path::new(name))
472 {
473 return true;
474 }
475 }
476 false
477}
478
479fn path_matches_any_ignore(path: &Path, ignores: &[String]) -> bool {
480 let comps: Vec<&str> = path
481 .components()
482 .filter_map(|c| match c {
483 std::path::Component::Normal(s) => s.to_str(),
484 _ => None,
485 })
486 .collect();
487 for ign in ignores {
488 let needles: Vec<&str> = ign.split('/').filter(|s| !s.is_empty()).collect();
492 if needles.is_empty() {
493 continue;
494 }
495 for w in comps.windows(needles.len()) {
496 if w == needles.as_slice() {
497 return true;
498 }
499 }
500 }
501 false
502}
503
504fn safe_join(root: &Path, relative: &str) -> pf_core::Result<PathBuf> {
510 let candidate = Path::new(relative);
511 if candidate.is_absolute() {
512 return Err(pf_core::Error::Integrity(format!(
513 "fs.tree entry has absolute path {relative:?} — refusing"
514 )));
515 }
516 for comp in candidate.components() {
519 match comp {
520 std::path::Component::ParentDir => {
521 return Err(pf_core::Error::Integrity(format!(
522 "fs.tree entry path {relative:?} contains `..` — refusing"
523 )));
524 }
525 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
526 return Err(pf_core::Error::Integrity(format!(
527 "fs.tree entry path {relative:?} has root/prefix — refusing"
528 )));
529 }
530 std::path::Component::CurDir | std::path::Component::Normal(_) => {}
531 }
532 }
533 Ok(root.join(candidate))
534}
535
536fn check_symlink_target(root: &Path, link_path: &Path, target: &str) -> pf_core::Result<()> {
542 let target_path = Path::new(target);
543 if target_path.is_absolute() {
544 return Err(pf_core::Error::Integrity(format!(
545 "symlink target {target:?} is absolute — refusing"
546 )));
547 }
548 let link_depth = link_path
552 .strip_prefix(root)
553 .ok()
554 .map_or(0, |p| p.components().count().saturating_sub(1));
555 let mut depth = isize::try_from(link_depth).unwrap_or(isize::MAX);
556 for comp in target_path.components() {
557 match comp {
558 std::path::Component::ParentDir => depth -= 1,
559 std::path::Component::Normal(_) => depth += 1,
560 std::path::Component::CurDir => {}
561 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
562 return Err(pf_core::Error::Integrity(format!(
563 "symlink target {target:?} has root/prefix — refusing"
564 )));
565 }
566 }
567 if depth < 0 {
568 return Err(pf_core::Error::Integrity(format!(
569 "symlink target {target:?} escapes restore root — refusing"
570 )));
571 }
572 }
573 Ok(())
574}
575
576#[cfg(unix)]
581fn apply_mode(path: &Path, mode: &str) -> pf_core::Result<()> {
582 use std::os::unix::fs::PermissionsExt as _;
583 let raw = u32::from_str_radix(mode, 8).unwrap_or(0o644);
584 let perm = std::fs::Permissions::from_mode(raw & 0o7777);
585 let meta = std::fs::symlink_metadata(path)?;
588 if meta.file_type().is_symlink() {
589 return Ok(());
590 }
591 std::fs::set_permissions(path, perm)?;
592 Ok(())
593}
594
595#[cfg(not(unix))]
596fn apply_mode(_path: &Path, _mode: &str) -> pf_core::Result<()> {
597 Ok(())
598}
599
600#[cfg(target_os = "macos")]
603fn apfs_clone(src: &Path) -> std::io::Result<PathBuf> {
604 use std::process::Command;
605 let dst = std::env::temp_dir().join(format!(
606 "pf-apfs-clone.{}.{}",
607 std::process::id(),
608 chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
609 ));
610 let status = Command::new("cp")
611 .args(["-c", "-R"])
612 .arg(src)
613 .arg(&dst)
614 .status()?;
615 if !status.success() {
616 return Err(std::io::Error::other(format!(
617 "cp -c -R exit status: {status:?}"
618 )));
619 }
620 Ok(dst)
621}
622
623#[cfg(not(target_os = "macos"))]
624fn apfs_clone(_src: &Path) -> std::io::Result<PathBuf> {
625 Err(std::io::Error::other("APFS clone only available on macOS"))
626}
627
628#[cfg(unix)]
631fn unix_mode_string(meta: &std::fs::Metadata) -> String {
632 use std::os::unix::fs::PermissionsExt;
633 format!("{:04o}", meta.permissions().mode() & 0o7777)
634}
635#[cfg(not(unix))]
636fn unix_mode_string(meta: &std::fs::Metadata) -> String {
637 if meta.permissions().readonly() {
638 "0444".into()
639 } else {
640 "0644".into()
641 }
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647 use pf_core::cas::MemBlobStore;
648 use std::sync::Arc;
649 use tempfile::TempDir;
650
651 fn write(dir: &Path, rel: &str, contents: &[u8]) {
652 let p = dir.join(rel);
653 if let Some(parent) = p.parent() {
654 std::fs::create_dir_all(parent).unwrap();
655 }
656 std::fs::write(&p, contents).unwrap();
657 }
658
659 #[test]
660 fn round_trip_small_tree() {
661 let src = TempDir::new().unwrap();
662 write(src.path(), "a.txt", b"hello");
663 write(src.path(), "sub/b.txt", b"world");
664 write(src.path(), "sub/c.bin", &vec![0xABu8; 8 * 1024]);
665
666 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
667 let tree_cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
668
669 let restore_root = TempDir::new().unwrap();
670 let dst = restore_root.path().join("restored");
671 restore_tree(&blobs, &tree_cid, &dst).unwrap();
672
673 assert_eq!(std::fs::read(dst.join("a.txt")).unwrap(), b"hello");
674 assert_eq!(std::fs::read(dst.join("sub/b.txt")).unwrap(), b"world");
675 assert_eq!(
676 std::fs::read(dst.join("sub/c.bin")).unwrap().len(),
677 8 * 1024
678 );
679 }
680
681 #[test]
682 fn capture_is_deterministic() {
683 let src = TempDir::new().unwrap();
684 write(src.path(), "a.txt", b"hello");
685 write(src.path(), "b.txt", b"world");
686 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
687 let cid1 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
688 let cid2 = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
689 assert_eq!(
690 cid1, cid2,
691 "capture of identical tree must be byte-identical"
692 );
693 }
694
695 #[test]
696 fn ignored_paths_are_skipped() {
697 let src = TempDir::new().unwrap();
698 write(src.path(), "kept.txt", b"keep");
699 write(src.path(), "node_modules/dep/index.js", b"skip");
700 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
701 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
702 let bytes = blobs.get(&cid).unwrap();
703 let tree: FsTree = serde_json::from_slice(&bytes).unwrap();
704 assert!(tree.entries.iter().any(|e| e.path == "kept.txt"));
705 assert!(
706 !tree
707 .entries
708 .iter()
709 .any(|e| e.path.starts_with("node_modules"))
710 );
711 }
712
713 #[cfg(unix)]
714 #[test]
715 fn symlinks_are_captured_as_symlinks() {
716 let src = TempDir::new().unwrap();
717 write(src.path(), "real.txt", b"data");
718 std::os::unix::fs::symlink("real.txt", src.path().join("link.txt")).unwrap();
719 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
720 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
721
722 let restore_root = TempDir::new().unwrap();
723 let dst = restore_root.path().join("r");
724 restore_tree(&blobs, &cid, &dst).unwrap();
725 let meta = std::fs::symlink_metadata(dst.join("link.txt")).unwrap();
726 assert!(meta.file_type().is_symlink());
727 assert_eq!(
728 std::fs::read_link(dst.join("link.txt"))
729 .unwrap()
730 .to_str()
731 .unwrap(),
732 "real.txt"
733 );
734 }
735
736 #[test]
741 fn malicious_relative_path_traversal_is_refused() {
742 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
743 let payload = b"PWNED";
744 let blob = blobs.put(payload).unwrap();
745 let tree = FsTree {
746 kind: "fs.tree.v1".into(),
747 entries: vec![FsTreeEntry {
748 path: "../../escape.txt".into(),
749 mode: "100644".into(),
750 size: payload.len() as u64,
751 kind: FsEntryKind::File,
752 blob: Some(blob),
753 link_target: None,
754 }],
755 };
756 let tree_bytes = serde_json::to_vec(&tree).unwrap();
757 let tree_cid = blobs.put(&tree_bytes).unwrap();
758
759 let restore_root = TempDir::new().unwrap();
760 let dst = restore_root.path().join("dst");
761 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
762 assert!(
763 format!("{err}").contains("`..`") || format!("{err}").contains("refusing"),
764 "expected path-traversal refusal, got {err}"
765 );
766 assert!(!restore_root.path().join("escape.txt").exists());
768 }
769
770 #[test]
772 fn malicious_absolute_path_is_refused() {
773 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
774 let blob = blobs.put(b"x").unwrap();
775 let tree = FsTree {
776 kind: "fs.tree.v1".into(),
777 entries: vec![FsTreeEntry {
778 path: "/tmp/should-not-write".into(),
779 mode: "100644".into(),
780 size: 1,
781 kind: FsEntryKind::File,
782 blob: Some(blob),
783 link_target: None,
784 }],
785 };
786 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
787 let restore_root = TempDir::new().unwrap();
788 let dst = restore_root.path().join("dst");
789 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
790 assert!(
791 format!("{err}").contains("absolute") || format!("{err}").contains("refusing"),
792 "expected absolute-path refusal, got {err}"
793 );
794 }
795
796 #[cfg(unix)]
800 #[test]
801 fn malicious_symlink_escape_is_refused() {
802 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
803 let target_str = "../../escape";
804 let blob = blobs.put(target_str.as_bytes()).unwrap();
805 let tree = FsTree {
806 kind: "fs.tree.v1".into(),
807 entries: vec![FsTreeEntry {
808 path: "evil.lnk".into(),
809 mode: "120777".into(),
810 size: target_str.len() as u64,
811 kind: FsEntryKind::Symlink,
812 blob: Some(blob),
813 link_target: Some(target_str.to_owned()),
814 }],
815 };
816 let tree_cid = blobs.put(&serde_json::to_vec(&tree).unwrap()).unwrap();
817 let restore_root = TempDir::new().unwrap();
818 let dst = restore_root.path().join("dst");
819 let err = restore_tree(&blobs, &tree_cid, &dst).unwrap_err();
820 assert!(
821 format!("{err}").contains("escape") || format!("{err}").contains("refusing"),
822 "expected symlink-escape refusal, got {err}"
823 );
824 }
825
826 #[cfg(unix)]
828 #[test]
829 fn executable_mode_is_restored() {
830 use std::os::unix::fs::PermissionsExt as _;
831 let src = TempDir::new().unwrap();
832 write(src.path(), "script.sh", b"#!/bin/sh\necho hi\n");
833 let scr = src.path().join("script.sh");
834 std::fs::set_permissions(&scr, std::fs::Permissions::from_mode(0o755)).unwrap();
835 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
836 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
837
838 let restore_root = TempDir::new().unwrap();
839 let dst = restore_root.path().join("r");
840 restore_tree(&blobs, &cid, &dst).unwrap();
841 let meta = std::fs::metadata(dst.join("script.sh")).unwrap();
842 assert_eq!(
843 meta.permissions().mode() & 0o7777,
844 0o755,
845 "executable bit must survive snapshot+restore"
846 );
847 }
848
849 #[test]
853 fn ignore_matches_segments_not_substrings() {
854 let src = TempDir::new().unwrap();
855 write(src.path(), "src/targeted/keep.txt", b"keep");
856 write(src.path(), "target/should-skip.txt", b"skip");
857 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
858 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
859 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
860 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
861 assert!(
862 paths.contains(&"src/targeted/keep.txt"),
863 "src/targeted/keep.txt must NOT be filtered (was: {paths:?})"
864 );
865 assert!(
866 !paths.iter().any(|p| p.starts_with("target/")),
867 "target/ subtree must be filtered (was: {paths:?})"
868 );
869 }
870
871 #[test]
876 fn default_ignores_skip_python_cache_dirs() {
877 let src = TempDir::new().unwrap();
878 write(src.path(), "src/main.py", b"print('hi')\n");
879 write(
880 src.path(),
881 "src/__pycache__/main.cpython-313.pyc",
882 b"\x03\xf3\r\n", );
884 write(src.path(), ".pytest_cache/CACHEDIR.TAG", b"Signature: ...");
885 write(src.path(), ".mypy_cache/3.13/CACHEDIR.TAG", b"...");
886 write(src.path(), ".ruff_cache/0.6.0/foo", b"x");
887 write(src.path(), ".venv/bin/python", b"#!/...\n");
888 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
889 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
890 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
891 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
892
893 assert!(
894 paths.contains(&"src/main.py"),
895 "real source file must survive: {paths:?}"
896 );
897 for cache_pat in [
898 "__pycache__",
899 ".pytest_cache",
900 ".mypy_cache",
901 ".ruff_cache",
902 ".venv",
903 ] {
904 assert!(
905 !paths.iter().any(|p| p.contains(cache_pat)),
906 "{cache_pat} must be filtered by default; got: {paths:?}"
907 );
908 }
909 }
910
911 #[test]
916 #[allow(clippy::case_sensitive_file_extension_comparisons)]
917 fn glob_patterns_match_files_anywhere_in_tree() {
918 let src = TempDir::new().unwrap();
919 write(src.path(), "src/main.py", b"keep");
920 write(src.path(), "src/legacy.pyc", b"skip-by-glob");
921 write(src.path(), "build/output.pyc", b"skip-by-glob");
922 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
923 let cid = WalkFsCapture::new(src.path()).capture(&blobs).unwrap();
924 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
925 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
926 assert!(
927 paths.contains(&"src/main.py"),
928 "non-glob source must survive: {paths:?}"
929 );
930 assert!(
931 !paths.iter().any(|p| p.ends_with(".pyc")),
932 "*.pyc glob must filter every .pyc anywhere: {paths:?}"
933 );
934 }
935
936 #[test]
940 fn opt_out_of_default_ignores_captures_caches() {
941 let src = TempDir::new().unwrap();
942 write(src.path(), "__pycache__/foo.pyc", b"x");
943 write(src.path(), "src/main.py", b"hi");
944 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
945 let cid = WalkFsCapture::new_without_default_ignores(src.path())
946 .capture(&blobs)
947 .unwrap();
948 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
949 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
950 assert!(
951 paths.iter().any(|p| p.contains("__pycache__")),
952 "without default ignores, __pycache__ must round-trip: {paths:?}"
953 );
954 }
955
956 #[test]
961 #[allow(clippy::case_sensitive_file_extension_comparisons)]
962 fn ignore_from_file_applies_each_line() {
963 let src = TempDir::new().unwrap();
964 write(src.path(), "src/main.py", b"keep");
965 write(src.path(), "secrets/api.key", b"private");
966 write(src.path(), "logs/today.log", b"verbose");
967 write(
968 src.path(),
969 ".pfignore",
970 b"# project ignores\nsecrets\n*.log\n",
971 );
972 let blobs: Arc<dyn BlobStore> = Arc::new(MemBlobStore::new());
973 let cid = WalkFsCapture::new(src.path())
974 .ignore_from(src.path().join(".pfignore"))
975 .unwrap()
976 .capture(&blobs)
977 .unwrap();
978 let tree: FsTree = serde_json::from_slice(&blobs.get(&cid).unwrap()).unwrap();
979 let paths: Vec<&str> = tree.entries.iter().map(|e| e.path.as_str()).collect();
980 assert!(paths.contains(&"src/main.py"));
981 assert!(
982 !paths.iter().any(|p| p.starts_with("secrets/")),
983 "secrets/ should be filtered by .pfignore: {paths:?}"
984 );
985 assert!(
986 !paths.iter().any(|p| p.ends_with(".log")),
987 "*.log glob from .pfignore should filter logs: {paths:?}"
988 );
989 }
990}