1use std::{
5 collections::BTreeSet,
6 fs,
7 num::NonZeroUsize,
8 path::{Path, PathBuf},
9 sync::atomic::{AtomicBool, Ordering},
10 thread,
11 time::Instant,
12};
13
14use objects::{
15 fs_atomic::enrich_fs_error,
16 object::{ChangeId, ContentHash, EntryType, Tree},
17 store::ObjectStore,
18};
19use tracing::{debug, instrument};
20
21#[cfg(unix)]
24use super::repository_worktree_apply::is_directory_not_empty;
25use super::{HeddleError, Repository, Result};
26use crate::{
27 worktree_index::IndexEntry,
28 worktree_walk::{build_cached_entry, cache_key, validate_symlink_target},
29};
30
31struct MaterializationContext {
58 reflink_supported: AtomicBool,
59 reflink_count: std::sync::atomic::AtomicUsize,
60 copy_count: std::sync::atomic::AtomicUsize,
61}
62
63impl MaterializationContext {
64 fn new() -> Self {
65 Self {
66 reflink_supported: AtomicBool::new(true),
69 reflink_count: std::sync::atomic::AtomicUsize::new(0),
70 copy_count: std::sync::atomic::AtomicUsize::new(0),
71 }
72 }
73
74 fn reflinks_enabled(&self) -> bool {
75 self.reflink_supported.load(Ordering::Relaxed)
76 }
77
78 fn record_reflink(&self) {
79 self.reflink_count.fetch_add(1, Ordering::Relaxed);
80 }
81
82 fn record_copy(&self) {
83 self.copy_count.fetch_add(1, Ordering::Relaxed);
84 }
85
86 fn disable_reflinks(&self) {
89 self.reflink_supported.store(false, Ordering::Relaxed);
90 }
91}
92
93const MATERIALIZE_PARALLEL_THRESHOLD: usize = 32;
94const MATERIALIZE_THREADS_ENV: &str = "HEDDLE_MATERIALIZE_THREADS";
95
96struct MaterializationPlan {
97 validation_root: PathBuf,
98 directories: Vec<PathBuf>,
99 directory_contexts: Vec<MaterializedDirectoryContext>,
100 leaves: Vec<WorktreeWriteOp>,
101 file_count: usize,
102 symlink_count: usize,
103}
104
105#[derive(Debug)]
106pub(crate) struct MaterializedTree {
107 pub(crate) file_entries: Vec<SeededWorktreeEntry>,
108 pub(crate) directory_contexts: Vec<MaterializedDirectoryContext>,
109}
110
111#[derive(Debug)]
112pub(crate) struct SeededWorktreeEntry {
113 pub(crate) key: String,
114 pub(crate) entry: IndexEntry,
115}
116
117#[derive(Debug)]
118pub(crate) struct MaterializedDirectoryContext {
119 pub(crate) key: String,
120 pub(crate) path: PathBuf,
121 pub(crate) child_names: Vec<String>,
122 pub(crate) tree_hash: ContentHash,
123}
124
125#[derive(Clone, Debug)]
126pub(crate) enum WorktreeWriteOp {
127 Blob {
128 path: PathBuf,
129 hash: ContentHash,
130 executable: bool,
131 },
132 Symlink {
133 path: PathBuf,
134 hash: ContentHash,
135 validation_root: PathBuf,
136 },
137}
138
139impl WorktreeWriteOp {
140 pub(crate) fn path(&self) -> &Path {
141 match self {
142 Self::Blob { path, .. } | Self::Symlink { path, .. } => path,
143 }
144 }
145
146 pub(crate) fn hash(&self) -> ContentHash {
147 match self {
148 Self::Blob { hash, .. } | Self::Symlink { hash, .. } => *hash,
149 }
150 }
151
152 pub(crate) fn executable(&self) -> bool {
153 match self {
154 Self::Blob { executable, .. } => *executable,
155 Self::Symlink { .. } => false,
156 }
157 }
158
159 pub(crate) fn index_kind(&self) -> crate::worktree_index::IndexEntryKind {
160 match self {
161 Self::Blob { .. } => crate::worktree_index::IndexEntryKind::File,
162 Self::Symlink { .. } => crate::worktree_index::IndexEntryKind::Symlink,
163 }
164 }
165}
166
167#[derive(Debug, Default, Clone, Copy)]
182pub struct WarmCanonicalStoreStats {
183 pub promoted: usize,
186 pub already_loose: usize,
188 pub errors: usize,
194}
195
196impl WarmCanonicalStoreStats {
197 pub fn total(&self) -> usize {
199 self.promoted + self.already_loose + self.errors
200 }
201}
202
203impl Repository {
204 #[instrument(skip(self), fields(state_id = %state_id))]
216 pub fn warm_canonical_store_for_state(
217 &self,
218 state_id: &ChangeId,
219 ) -> Result<WarmCanonicalStoreStats> {
220 self.warm_canonical_store_for_states(std::slice::from_ref(state_id))
221 }
222
223 #[instrument(skip(self, state_ids), fields(state_count = state_ids.len()))]
229 pub fn warm_canonical_store_for_states(
230 &self,
231 state_ids: &[ChangeId],
232 ) -> Result<WarmCanonicalStoreStats> {
233 let mut blob_hashes = BTreeSet::new();
234 for state_id in state_ids {
235 let state = self
236 .store
237 .get_state(state_id)?
238 .ok_or_else(|| HeddleError::NotFound(format!("state {} not in store", state_id)))?;
239 let tree = self.store.get_tree(&state.tree)?.ok_or_else(|| {
240 HeddleError::NotFound(format!("tree {} (for state {})", state.tree, state_id))
241 })?;
242 self.collect_blob_hashes(&tree, &mut blob_hashes)?;
243 }
244
245 let mut stats = WarmCanonicalStoreStats::default();
246 for hash in &blob_hashes {
247 match self.store.promote_to_loose_uncompressed(hash) {
248 Ok(true) => stats.promoted += 1,
249 Ok(false) => stats.already_loose += 1,
250 Err(err) => {
251 debug!(
252 ?err,
253 hash = %hash,
254 "promote_to_loose_uncompressed failed during warm pass"
255 );
256 stats.errors += 1;
257 }
258 }
259 }
260
261 debug!(
262 promoted = stats.promoted,
263 already_loose = stats.already_loose,
264 errors = stats.errors,
265 "Warm canonical store pass complete"
266 );
267
268 Ok(stats)
269 }
270
271 fn collect_blob_hashes(&self, tree: &Tree, out: &mut BTreeSet<ContentHash>) -> Result<()> {
272 for entry in tree.entries() {
273 match entry.entry_type {
284 EntryType::Blob | EntryType::Symlink => {
285 out.insert(entry.hash);
286 }
287 EntryType::Tree => {
288 let subtree = self
289 .store
290 .get_tree(&entry.hash)?
291 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
292 self.collect_blob_hashes(&subtree, out)?;
293 }
294 }
295 }
296 Ok(())
297 }
298
299 #[instrument(skip(self, tree), fields(dir = %dir.display(), entries = tree.len()))]
310 pub(crate) fn materialize_tree(&self, tree: &Tree, dir: &Path) -> Result<()> {
311 self.materialize_tree_seeded(tree, dir).map(|_| ())
312 }
313
314 pub fn materialize_computed_tree(&self, tree: &Tree, dir: &Path) -> Result<()> {
321 self.materialize_tree(tree, dir)
322 }
323
324 pub(crate) fn materialize_tree_seeded(
325 &self,
326 tree: &Tree,
327 dir: &Path,
328 ) -> Result<MaterializedTree> {
329 let plan_start = Instant::now();
330 let mut plan = MaterializationPlan {
331 validation_root: dir.to_path_buf(),
332 directories: Vec::new(),
333 directory_contexts: Vec::new(),
334 leaves: Vec::new(),
335 file_count: 0,
336 symlink_count: 0,
337 };
338 self.plan_materialization(tree, Path::new(""), dir, &mut plan)?;
339 let plan_duration_ms = plan_start.elapsed().as_millis();
340
341 let execution_start = Instant::now();
342 let requested_threads = requested_materialization_threads();
343 fs::create_dir_all(dir)
344 .map_err(|e| HeddleError::Io(enrich_fs_error(dir, "creating", e)))?;
345 for directory in &plan.directories {
346 fs::create_dir_all(directory)
347 .map_err(|e| HeddleError::Io(enrich_fs_error(directory, "creating", e)))?;
348 }
349
350 let (worker_count, file_entries) = self.materialize_write_ops_seeded(&plan.leaves)?;
351
352 debug!(
353 directories = plan.directories.len(),
354 files = plan.file_count,
355 symlinks = plan.symlink_count,
356 workers = worker_count,
357 requested_workers = requested_threads.map(NonZeroUsize::get),
358 plan_duration_ms,
359 execution_duration_ms = execution_start.elapsed().as_millis(),
360 parallel = worker_count > 1,
361 "Tree materialization complete"
362 );
363
364 Ok(MaterializedTree {
365 file_entries,
366 directory_contexts: plan.directory_contexts,
367 })
368 }
369
370 fn plan_materialization(
371 &self,
372 tree: &Tree,
373 rel_dir: &Path,
374 dir: &Path,
375 plan: &mut MaterializationPlan,
376 ) -> Result<()> {
377 plan.directory_contexts.push(MaterializedDirectoryContext {
378 key: cache_key(rel_dir),
379 path: dir.to_path_buf(),
380 child_names: tree
381 .entries()
382 .iter()
383 .map(|entry| entry.name.clone())
384 .collect(),
385 tree_hash: tree.hash(),
386 });
387
388 for entry in tree.entries() {
389 let path = dir.join(&entry.name);
390 let rel_path = rel_dir.join(&entry.name);
391 let is_symlink = entry.entry_type == EntryType::Symlink
399 || entry.mode == objects::object::FileMode::Symlink;
400 if is_symlink {
401 plan.symlink_count += 1;
402 plan.leaves.push(WorktreeWriteOp::Symlink {
403 path,
404 hash: entry.hash,
405 validation_root: plan.validation_root.clone(),
406 });
407 continue;
408 }
409 match entry.entry_type {
410 EntryType::Blob => {
411 plan.file_count += 1;
412 plan.leaves.push(WorktreeWriteOp::Blob {
413 path,
414 hash: entry.hash,
415 executable: entry.is_executable(),
416 });
417 }
418 EntryType::Tree => {
419 let subtree = self
420 .store
421 .get_tree(&entry.hash)?
422 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
423 plan.directories.push(path.clone());
424 self.plan_materialization(&subtree, &rel_path, &path, plan)?;
425 }
426 EntryType::Symlink => {
427 unreachable!(
429 "EntryType::Symlink should have been routed by the is_symlink guard"
430 );
431 }
432 }
433 }
434
435 Ok(())
436 }
437
438 pub(crate) fn materialize_write_ops(&self, writes: &[WorktreeWriteOp]) -> Result<usize> {
439 self.materialize_write_ops_seeded(writes)
440 .map(|(worker_count, _)| worker_count)
441 }
442
443 pub(crate) fn materialize_write_ops_seeded(
444 &self,
445 writes: &[WorktreeWriteOp],
446 ) -> Result<(usize, Vec<SeededWorktreeEntry>)> {
447 prepare_parent_directories(writes)?;
448
449 let requested_threads = requested_materialization_threads();
450 let worker_count = materialization_worker_count(writes.len(), requested_threads);
451
452 let context = MaterializationContext::new();
460
461 let result = if worker_count <= 1 {
462 let mut seeded = Vec::with_capacity(writes.len());
463 for write in writes {
464 seeded.push(self.materialize_write_op(write, &context)?);
465 }
466 Ok((worker_count, seeded))
467 } else {
468 let chunk_size = writes.len().div_ceil(worker_count);
469 let seeded = thread::scope(|scope| -> Result<Vec<SeededWorktreeEntry>> {
470 let mut workers = Vec::new();
471 let context = &context;
472 for chunk in writes.chunks(chunk_size) {
473 workers.push(scope.spawn(move || -> Result<Vec<SeededWorktreeEntry>> {
474 let mut seeded = Vec::with_capacity(chunk.len());
475 for write in chunk {
476 seeded.push(self.materialize_write_op(write, context)?);
477 }
478 Ok(seeded)
479 }));
480 }
481
482 let mut seeded = Vec::with_capacity(writes.len());
483 for worker in workers {
484 seeded.extend(worker.join().map_err(|_| {
485 HeddleError::Config("materialization worker panicked".to_string())
486 })??);
487 }
488
489 Ok(seeded)
490 })?;
491
492 Ok((worker_count, seeded))
493 };
494
495 let reflinks = context.reflink_count.load(Ordering::Relaxed);
496 let copies = context.copy_count.load(Ordering::Relaxed);
497 if reflinks + copies > 0 {
498 debug!(
499 reflinks,
500 copies,
501 reflinks_enabled = context.reflinks_enabled(),
502 "Materialized blobs"
503 );
504 }
505
506 result
507 }
508
509 fn materialize_write_op(
510 &self,
511 write: &WorktreeWriteOp,
512 context: &MaterializationContext,
513 ) -> Result<SeededWorktreeEntry> {
514 match write {
515 WorktreeWriteOp::Blob {
516 path,
517 hash,
518 executable,
519 } => {
520 self.materialize_blob(path, hash, *executable, context)?;
521 }
522 WorktreeWriteOp::Symlink {
523 path,
524 hash,
525 validation_root,
526 } => {
527 let blob = self
528 .store
529 .get_blob(hash)?
530 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
531 #[cfg(unix)]
532 {
533 let target = std::str::from_utf8(blob.content()).map_err(|_| {
534 HeddleError::InvalidObject("invalid symlink target".to_string())
535 })?;
536 let target_path = Path::new(target);
537 let symlink_dir = path.parent().unwrap_or(validation_root);
538 if !validate_symlink_target(validation_root, symlink_dir, target_path) {
539 return Err(HeddleError::InvalidSymlinkTarget(target_path.to_path_buf()));
540 }
541 remove_materialized_leaf(path)?;
542 std::os::unix::fs::symlink(target, path)?;
543 }
544 #[cfg(not(unix))]
551 {
552 let _ = (blob, path, validation_root);
553 }
554 }
555 }
556
557 let metadata = fs::symlink_metadata(write.path())?;
558 let entry = build_cached_entry(
559 write.hash(),
560 &metadata,
561 write.executable(),
562 write.index_kind(),
563 )
564 .ok_or_else(|| {
565 HeddleError::Config(format!(
566 "seed materialized worktree entry for {}",
567 write.path().display()
568 ))
569 })?;
570
571 Ok(SeededWorktreeEntry {
572 key: cache_key(
573 write
574 .path()
575 .strip_prefix(self.root())
576 .unwrap_or(write.path()),
577 ),
578 entry,
579 })
580 }
581
582 fn materialize_blob(
608 &self,
609 dest: &Path,
610 hash: &ContentHash,
611 executable: bool,
612 context: &MaterializationContext,
613 ) -> Result<()> {
614 if let Some(stub) = self
624 .redaction_stub_for_blob(hash)
625 .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?
626 {
627 let _ = fs::remove_file(dest);
628 fs::write(dest, stub.as_bytes())?;
629 set_file_mode(dest, false)?;
633 context.record_copy();
636 let _ = executable;
637 return Ok(());
638 }
639
640 if context.reflinks_enabled() {
641 if let Some(source) = self.store.loose_blob_path(hash)
643 && self.try_clone(&source, dest, executable, context)?
644 {
645 return Ok(());
646 }
647 match self.store.promote_to_loose_uncompressed(hash) {
659 Ok(_) => {
660 if let Some(source) = self.store.loose_blob_path(hash)
661 && self.try_clone(&source, dest, executable, context)?
662 {
663 return Ok(());
664 }
665 }
666 Err(err) => {
667 debug!(
668 ?err,
669 hash = %hash,
670 "promote_to_loose_uncompressed failed; falling back to fs::write"
671 );
672 }
673 }
674 }
675
676 let blob = self
677 .store
678 .get_blob(hash)?
679 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
680 let _ = fs::remove_file(dest);
685 fs::write(dest, blob.content())?;
686 set_file_mode(dest, executable)?;
687 context.record_copy();
688 Ok(())
689 }
690
691 fn try_clone(
708 &self,
709 source: &Path,
710 dest: &Path,
711 executable: bool,
712 context: &MaterializationContext,
713 ) -> Result<bool> {
714 let _ = fs::remove_file(dest);
718 if !source.exists() {
732 debug!(
733 source = %source.display(),
734 dest = %dest.display(),
735 "loose reflink source missing before clone; falling back to bytes-write for this blob"
736 );
737 return Ok(false);
738 }
739 use objects::fs_clone::ReflinkOutcome;
740 match objects::fs_clone::try_reflink(source, dest) {
741 Ok(ReflinkOutcome::Cloned) => {
742 set_file_mode(dest, executable)?;
743 context.record_reflink();
744 Ok(true)
745 }
746 Ok(ReflinkOutcome::Unsupported) => {
747 debug!(
752 source = %source.display(),
753 dest = %dest.display(),
754 "reflink not supported on this filesystem; switching batch to fs::write fallback"
755 );
756 context.disable_reflinks();
757 Ok(false)
758 }
759 Ok(ReflinkOutcome::SourceVanished) => {
760 debug!(
769 source = %source.display(),
770 dest = %dest.display(),
771 "loose reflink source vanished before clone; falling back to bytes-write for this blob (reflinks stay enabled batch-wide)"
772 );
773 Ok(false)
774 }
775 Err(err) => {
776 debug!(
777 ?err,
778 source = %source.display(),
779 dest = %dest.display(),
780 "reflink failed with I/O error"
781 );
782 match classify_clone_failure(source, dest, &err) {
783 None => {
787 debug!(
788 source = %source.display(),
789 dest = %dest.display(),
790 "loose reflink source vanished between pre-check and clone syscall; falling back to bytes-write for this blob"
791 );
792 Ok(false)
793 }
794 Some((offender, action)) => {
796 Err(HeddleError::Io(enrich_fs_error(offender, action, err)))
797 }
798 }
799 }
800 }
801 }
802}
803
804fn classify_clone_failure<'a>(
822 source: &'a Path,
823 dest: &'a Path,
824 err: &std::io::Error,
825) -> Option<(&'a Path, &'static str)> {
826 if err.kind() == std::io::ErrorKind::NotFound && !source.exists() {
827 return None;
828 }
829 if fs::File::open(source).is_ok() {
830 Some((dest, "reflinking into"))
831 } else {
832 Some((source, "reflinking"))
833 }
834}
835
836fn prepare_parent_directories(writes: &[WorktreeWriteOp]) -> Result<()> {
837 let mut parents = BTreeSet::new();
838 for write in writes {
839 if let Some(parent) = write.path().parent() {
840 parents.insert(parent.to_path_buf());
841 }
842 }
843
844 for parent in parents {
845 fs::create_dir_all(&parent)
846 .map_err(|e| HeddleError::Io(enrich_fs_error(&parent, "creating", e)))?;
847 }
848
849 Ok(())
850}
851
852#[cfg(unix)]
869fn remove_materialized_leaf(path: &Path) -> Result<()> {
870 match fs::symlink_metadata(path) {
871 Ok(metadata) => {
872 let file_type = metadata.file_type();
873 if file_type.is_symlink() || file_type.is_file() {
874 fs::remove_file(path)
875 .map_err(|e| HeddleError::Io(enrich_fs_error(path, "removing", e)))?;
876 } else if file_type.is_dir() {
877 match fs::remove_dir(path) {
878 Ok(()) => {}
879 Err(error) if is_directory_not_empty(&error) => {}
880 Err(error) => {
881 return Err(HeddleError::Io(enrich_fs_error(path, "removing", error)));
882 }
883 }
884 }
885 Ok(())
886 }
887 Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
888 Err(error) => Err(HeddleError::Io(enrich_fs_error(path, "inspecting", error))),
889 }
890}
891
892fn set_file_mode(path: &Path, executable: bool) -> Result<()> {
893 #[cfg(unix)]
894 {
895 use std::os::unix::fs::PermissionsExt;
896
897 let mode = if executable { 0o755 } else { 0o644 };
903 fs::set_permissions(path, fs::Permissions::from_mode(mode))?;
904 }
905 #[cfg(not(unix))]
906 {
907 let _ = (path, executable);
908 }
909 Ok(())
910}
911
912fn materialization_worker_count(
913 operation_count: usize,
914 requested_threads: Option<NonZeroUsize>,
915) -> usize {
916 if operation_count < MATERIALIZE_PARALLEL_THRESHOLD {
917 return 1;
918 }
919
920 let available = requested_threads.unwrap_or_else(default_materialization_threads);
921 available.get().min(operation_count.max(1))
922}
923
924fn default_materialization_threads() -> NonZeroUsize {
925 std::thread::available_parallelism().unwrap_or(NonZeroUsize::MIN)
926}
927
928fn requested_materialization_threads() -> Option<NonZeroUsize> {
929 let raw = std::env::var(MATERIALIZE_THREADS_ENV).ok()?;
930 raw.trim().parse::<usize>().ok().and_then(NonZeroUsize::new)
931}
932
933#[cfg(test)]
934mod tests {
935 use std::{num::NonZeroUsize, path::PathBuf};
936
937 use objects::{fs_clone::filesystem_supports_reflink, object::Blob, store::ObjectStore};
938 use tempfile::TempDir;
939
940 use super::{
941 MaterializationContext, Repository, WorktreeWriteOp, classify_clone_failure,
942 materialization_worker_count, remove_materialized_leaf,
943 };
944
945 #[test]
949 fn classify_clone_failure_vanished_source_falls_back() {
950 let temp = TempDir::new().unwrap();
951 let source = temp.path().join("gone.blob");
952 let dest = temp.path().join("checkout/file");
953 assert!(!source.exists());
954
955 let enoent = std::io::Error::from(std::io::ErrorKind::NotFound);
956 assert!(
957 classify_clone_failure(&source, &dest, &enoent).is_none(),
958 "a vanished-source ENOENT must signal the bytes-write fallback"
959 );
960 }
961
962 #[test]
966 fn classify_clone_failure_present_source_blames_dest() {
967 let temp = TempDir::new().unwrap();
968 let source = temp.path().join("present.blob");
969 std::fs::write(&source, b"bytes").unwrap();
970 let dest = temp.path().join("readonly-checkout/file");
971
972 let erofs = std::io::Error::from(std::io::ErrorKind::PermissionDenied);
974 let attributed = classify_clone_failure(&source, &dest, &erofs);
975 assert_eq!(
976 attributed,
977 Some((dest.as_path(), "reflinking into")),
978 "a failure with the source still readable must be attributed to dest"
979 );
980 }
981
982 #[test]
986 fn classify_clone_failure_unreadable_source_blames_source() {
987 let temp = TempDir::new().unwrap();
988 let source = temp.path().join("missing.blob"); let dest = temp.path().join("file");
990
991 let other = std::io::Error::from(std::io::ErrorKind::PermissionDenied);
994 assert_eq!(
995 classify_clone_failure(&source, &dest, &other),
996 Some((source.as_path(), "reflinking")),
997 "an unreadable source must be attributed to the source path"
998 );
999 }
1000
1001 #[test]
1009 fn try_clone_vanished_source_keeps_batch_reflinks_enabled() {
1010 let temp = TempDir::new().unwrap();
1011 let repo = Repository::init_default(temp.path()).unwrap();
1012 let context = MaterializationContext::new();
1013 assert!(context.reflinks_enabled(), "context starts optimistic");
1014
1015 let missing = temp.path().join("pruned.blob");
1016 let dest = temp.path().join("wt/out.txt");
1017 assert!(!missing.exists());
1018
1019 let cloned = repo
1020 .try_clone(&missing, &dest, false, &context)
1021 .expect("a vanished source must fall back, not error");
1022 assert!(!cloned, "a vanished source cannot have been reflinked");
1023 assert!(
1024 context.reflinks_enabled(),
1025 "a vanished source must NOT disable reflinks for the rest of the batch"
1026 );
1027 }
1028
1029 #[test]
1039 fn remove_materialized_leaf_tolerates_directory_not_empty() {
1040 let temp = TempDir::new().unwrap();
1041 let dir = temp.path().join("web");
1042 std::fs::create_dir_all(dir.join("node_modules/lodash")).unwrap();
1043 std::fs::write(dir.join("node_modules/lodash/index.js"), "ignored").unwrap();
1044
1045 remove_materialized_leaf(&dir).expect("must tolerate ENOTEMPTY");
1048 assert!(
1049 dir.join("node_modules/lodash/index.js").exists(),
1050 "ignored content must survive the tolerated removal"
1051 );
1052 }
1053
1054 #[test]
1057 fn remove_materialized_leaf_removes_empty_directory() {
1058 let temp = TempDir::new().unwrap();
1059 let dir = temp.path().join("emptydir");
1060 std::fs::create_dir(&dir).unwrap();
1061
1062 remove_materialized_leaf(&dir).expect("must remove empty dir");
1063 assert!(!dir.exists(), "empty directory must be removed");
1064 }
1065
1066 #[test]
1068 fn remove_materialized_leaf_is_noop_for_missing_path() {
1069 let temp = TempDir::new().unwrap();
1070 remove_materialized_leaf(&temp.path().join("does-not-exist"))
1071 .expect("missing path must be a no-op");
1072 }
1073
1074 #[test]
1077 fn remove_materialized_leaf_removes_regular_file() {
1078 let temp = TempDir::new().unwrap();
1079 let file = temp.path().join("a.txt");
1080 std::fs::write(&file, "content").unwrap();
1081
1082 remove_materialized_leaf(&file).expect("must remove regular file");
1083 assert!(!file.exists(), "regular file must be removed");
1084 }
1085
1086 #[test]
1087 fn materialization_parallelism_stays_sequential_for_small_workloads() {
1088 assert_eq!(materialization_worker_count(31, Some(NonZeroUsize::MIN)), 1);
1089 }
1090
1091 #[test]
1092 fn materialization_parallelism_respects_requested_thread_cap() {
1093 assert_eq!(materialization_worker_count(128, NonZeroUsize::new(4)), 4);
1094 }
1095
1096 #[test]
1097 fn materialize_write_ops_prepares_missing_parent_directories() {
1098 let temp_dir = TempDir::new().unwrap();
1099 let repo = Repository::init_default(temp_dir.path()).unwrap();
1100
1101 let blob = Blob::from("cold pull payload");
1102 let hash = repo.store().put_blob(&blob).unwrap();
1103 let file_path = temp_dir.path().join("nested/deep/file.txt");
1104
1105 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1106 path: file_path.clone(),
1107 hash,
1108 executable: false,
1109 }])
1110 .unwrap();
1111
1112 assert_eq!(
1113 std::fs::read_to_string(&file_path).unwrap(),
1114 "cold pull payload"
1115 );
1116 }
1117
1118 #[test]
1125 #[cfg(unix)]
1126 fn materialized_blob_uses_normal_writable_mode() {
1127 use std::os::unix::fs::PermissionsExt;
1128
1129 let temp_dir = TempDir::new().unwrap();
1130 let repo = Repository::init_default(temp_dir.path()).unwrap();
1131
1132 let blob = Blob::from("normal mode payload");
1133 let hash = repo.store().put_blob(&blob).unwrap();
1134 let regular = temp_dir.path().join("worktree/file.txt");
1135 let exec = temp_dir.path().join("worktree/run.sh");
1136
1137 repo.materialize_write_ops(&[
1138 WorktreeWriteOp::Blob {
1139 path: regular.clone(),
1140 hash,
1141 executable: false,
1142 },
1143 WorktreeWriteOp::Blob {
1144 path: exec.clone(),
1145 hash,
1146 executable: true,
1147 },
1148 ])
1149 .unwrap();
1150
1151 let regular_mode = std::fs::metadata(®ular).unwrap().permissions().mode() & 0o777;
1152 let exec_mode = std::fs::metadata(&exec).unwrap().permissions().mode() & 0o777;
1153 assert_eq!(
1154 regular_mode, 0o644,
1155 "regular blob must be 0o644 (got 0o{:o})",
1156 regular_mode
1157 );
1158 assert_eq!(
1159 exec_mode, 0o755,
1160 "executable blob must be 0o755 (got 0o{:o})",
1161 exec_mode
1162 );
1163
1164 std::fs::write(®ular, b"agent edits this").unwrap();
1167 assert_eq!(std::fs::read(®ular).unwrap(), b"agent edits this");
1168 }
1169
1170 #[test]
1178 #[cfg(unix)]
1179 fn materialize_then_chmod_and_write_does_not_affect_sibling_worktree() {
1180 use std::os::unix::fs::PermissionsExt;
1181
1182 let temp_dir = TempDir::new().unwrap();
1183 let repo = Repository::init_default(temp_dir.path()).unwrap();
1184
1185 let blob = Blob::from("canonical bytes that must never change");
1186 let hash = repo.store().put_blob(&blob).unwrap();
1187
1188 let worktree_a = temp_dir.path().join("wt-a/file.txt");
1189 let worktree_b = temp_dir.path().join("wt-b/file.txt");
1190
1191 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1192 path: worktree_a.clone(),
1193 hash,
1194 executable: false,
1195 }])
1196 .unwrap();
1197 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1198 path: worktree_b.clone(),
1199 hash,
1200 executable: false,
1201 }])
1202 .unwrap();
1203
1204 std::fs::set_permissions(&worktree_a, std::fs::Permissions::from_mode(0o644)).unwrap();
1209 std::fs::write(&worktree_a, b"AGENT_TAMPERED_WITH_WORKTREE_A").unwrap();
1210
1211 assert_eq!(
1213 std::fs::read(&worktree_b).unwrap(),
1214 blob.content(),
1215 "sibling worktree must keep canonical bytes despite in-place write to worktree-a"
1216 );
1217 if let Some(loose) = repo.store().loose_blob_path(&hash) {
1219 assert_eq!(
1220 std::fs::read(&loose).unwrap(),
1221 blob.content(),
1222 "canonical loose blob must keep canonical bytes despite in-place write to worktree-a"
1223 );
1224 }
1225 }
1226
1227 #[test]
1232 #[cfg(unix)]
1233 fn materialize_atomic_rename_does_not_affect_sibling_worktree() {
1234 let temp_dir = TempDir::new().unwrap();
1235 let repo = Repository::init_default(temp_dir.path()).unwrap();
1236
1237 let blob = Blob::from("atomic-rename canonical bytes");
1238 let hash = repo.store().put_blob(&blob).unwrap();
1239
1240 let worktree_a = temp_dir.path().join("wt-a/file.txt");
1241 let worktree_b = temp_dir.path().join("wt-b/file.txt");
1242
1243 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1244 path: worktree_a.clone(),
1245 hash,
1246 executable: false,
1247 }])
1248 .unwrap();
1249 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1250 path: worktree_b.clone(),
1251 hash,
1252 executable: false,
1253 }])
1254 .unwrap();
1255
1256 let tmp = temp_dir.path().join("wt-a/file.txt.tmp");
1257 std::fs::write(&tmp, b"NEW_CONTENT_VIA_ATOMIC_RENAME").unwrap();
1258 std::fs::rename(&tmp, &worktree_a).unwrap();
1259
1260 assert_eq!(
1261 std::fs::read(&worktree_a).unwrap(),
1262 b"NEW_CONTENT_VIA_ATOMIC_RENAME"
1263 );
1264 assert_eq!(
1265 std::fs::read(&worktree_b).unwrap(),
1266 blob.content(),
1267 "sibling worktree must keep canonical bytes despite atomic rename in worktree-a"
1268 );
1269 }
1270
1271 #[test]
1282 #[cfg(unix)]
1283 fn materialize_uses_reflink_when_filesystem_supports_it() {
1284 use std::os::unix::fs::MetadataExt;
1285
1286 let temp_dir = TempDir::new().unwrap();
1287 if !filesystem_supports_reflink(temp_dir.path()) {
1288 eprintln!(
1289 "[skip] filesystem at {:?} does not advertise reflink support",
1290 temp_dir.path()
1291 );
1292 return;
1293 }
1294
1295 let repo = Repository::init_default(temp_dir.path()).unwrap();
1296 let blob = Blob::from("reflink correctness check, kept under compression threshold");
1297 let hash = repo.store().put_blob(&blob).unwrap();
1298 let worktree = temp_dir.path().join("wt/file.txt");
1299
1300 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1301 path: worktree.clone(),
1302 hash,
1303 executable: false,
1304 }])
1305 .unwrap();
1306
1307 let loose = repo
1308 .store()
1309 .loose_blob_path(&hash)
1310 .expect("blob must be loose+uncompressed (under threshold)");
1311 let loose_inode = std::fs::metadata(&loose).unwrap().ino();
1312 let worktree_inode = std::fs::metadata(&worktree).unwrap().ino();
1313 assert_ne!(
1314 loose_inode, worktree_inode,
1315 "reflinked worktree file must have a distinct inode from canonical loose blob (got {} for both — that's a hardlink, the bug we fixed)",
1316 loose_inode
1317 );
1318 let nlink = std::fs::metadata(&loose).unwrap().nlink();
1320 assert_eq!(
1321 nlink, 1,
1322 "canonical loose blob must not be aliased (nlink={}); reflinks share blocks, not inodes",
1323 nlink
1324 );
1325 }
1326
1327 #[test]
1334 #[cfg(unix)]
1335 fn materialize_blob_into_two_worktrees_reads_back_canonical_bytes() {
1336 let temp_dir = TempDir::new().unwrap();
1337 let repo = Repository::init_default(temp_dir.path()).unwrap();
1338
1339 let blob = Blob::from("two-worktree readback payload");
1340 let hash = repo.store().put_blob(&blob).unwrap();
1341
1342 let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1343 let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1344
1345 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1346 path: worktree_a.clone(),
1347 hash,
1348 executable: false,
1349 }])
1350 .unwrap();
1351 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1352 path: worktree_b.clone(),
1353 hash,
1354 executable: false,
1355 }])
1356 .unwrap();
1357
1358 assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1359 assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1360 }
1361
1362 #[test]
1368 #[cfg(unix)]
1369 fn materialize_symlink_op_produces_real_symlink_not_hardlink() {
1370 let temp_dir = TempDir::new().unwrap();
1371 let repo = Repository::init_default(temp_dir.path()).unwrap();
1372
1373 let symlink_blob = Blob::new(b"../canonical".to_vec());
1374 let symlink_hash = repo.store().put_blob(&symlink_blob).unwrap();
1375 let path = temp_dir.path().join("worktree/link.txt");
1376
1377 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1378 path: path.clone(),
1379 hash: symlink_hash,
1380 validation_root: temp_dir.path().to_path_buf(),
1381 }])
1382 .unwrap();
1383
1384 let meta = std::fs::symlink_metadata(&path).unwrap();
1385 assert!(
1386 meta.file_type().is_symlink(),
1387 "Symlink op must produce a real symlink, not a hardlinked regular file"
1388 );
1389 assert_eq!(
1390 std::fs::read_link(&path).unwrap(),
1391 PathBuf::from("../canonical")
1392 );
1393 }
1394
1395 #[test]
1396 #[cfg(unix)]
1397 fn materialize_symlink_op_replaces_existing_symlink() {
1398 let temp_dir = TempDir::new().unwrap();
1399 let repo = Repository::init_default(temp_dir.path()).unwrap();
1400
1401 let first_hash = repo.store().put_blob(&Blob::from("first")).unwrap();
1402 let second_hash = repo.store().put_blob(&Blob::from("second")).unwrap();
1403 let path = temp_dir.path().join("worktree/link.txt");
1404
1405 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1406 path: path.clone(),
1407 hash: first_hash,
1408 validation_root: temp_dir.path().to_path_buf(),
1409 }])
1410 .unwrap();
1411 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1412 path: path.clone(),
1413 hash: second_hash,
1414 validation_root: temp_dir.path().to_path_buf(),
1415 }])
1416 .unwrap();
1417
1418 assert_eq!(std::fs::read_link(&path).unwrap(), PathBuf::from("second"));
1419 }
1420
1421 #[test]
1422 #[cfg(unix)]
1423 fn materialize_write_ops_reuses_prepared_parent_for_multiple_writes() {
1424 let temp_dir = TempDir::new().unwrap();
1425 let repo = Repository::init_default(temp_dir.path()).unwrap();
1426
1427 let symlink_target = Blob::new(b"../target.txt".to_vec());
1428 let target_hash = repo.store().put_blob(&Blob::from("target")).unwrap();
1429 let symlink_hash = repo.store().put_blob(&symlink_target).unwrap();
1430 let base_dir = temp_dir.path().join("nested/deep");
1431 let target_path = base_dir.join("target.txt");
1432 let link_path = base_dir.join("link.txt");
1433
1434 repo.materialize_write_ops(&[
1435 WorktreeWriteOp::Blob {
1436 path: target_path.clone(),
1437 hash: target_hash,
1438 executable: false,
1439 },
1440 WorktreeWriteOp::Symlink {
1441 path: link_path.clone(),
1442 hash: symlink_hash,
1443 validation_root: temp_dir.path().to_path_buf(),
1444 },
1445 ])
1446 .unwrap();
1447
1448 assert_eq!(std::fs::read_to_string(&target_path).unwrap(), "target");
1449 assert_eq!(
1450 std::fs::read_link(&link_path).unwrap(),
1451 PathBuf::from("../target.txt")
1452 );
1453 }
1454
1455 #[test]
1463 #[cfg(unix)]
1464 fn lazy_promotion_after_pack_and_prune_restores_loose_mirror() {
1465 let temp_dir = TempDir::new().unwrap();
1466 let repo = Repository::init_default(temp_dir.path()).unwrap();
1467
1468 let blob = Blob::from(
1469 "lazy-promotion payload, packed-then-pruned, kept under compression threshold",
1470 );
1471 let hash = repo.store().put_blob(&blob).unwrap();
1472
1473 repo.store().pack_objects(false).unwrap();
1476 repo.store().prune_loose_objects().unwrap();
1477 assert!(
1478 repo.store().loose_blob_path(&hash).is_none(),
1479 "after pack+prune, the canonical loose path must be empty"
1480 );
1481
1482 let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1483 let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1484 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1485 path: worktree_a.clone(),
1486 hash,
1487 executable: false,
1488 }])
1489 .unwrap();
1490 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1491 path: worktree_b.clone(),
1492 hash,
1493 executable: false,
1494 }])
1495 .unwrap();
1496
1497 assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1499 assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1500
1501 let loose = repo
1503 .store()
1504 .loose_blob_path(&hash)
1505 .expect("after lazy promotion the canonical loose path must exist");
1506 assert_eq!(std::fs::read(&loose).unwrap(), blob.content());
1507 }
1508
1509 #[test]
1515 #[cfg(unix)]
1516 fn proactive_warm_promotes_all_state_blobs() {
1517 let temp_dir = TempDir::new().unwrap();
1518 let repo = Repository::init_default(temp_dir.path()).unwrap();
1519
1520 for i in 0..4 {
1522 std::fs::write(
1523 temp_dir.path().join(format!("file-{i}.txt")),
1524 format!("warm-pass payload {i} {}", "x".repeat(140)),
1525 )
1526 .unwrap();
1527 }
1528 let state = repo
1529 .snapshot(Some("warm-pass test".to_string()), None)
1530 .unwrap();
1531
1532 repo.store().pack_objects(false).unwrap();
1534 repo.store().prune_loose_objects().unwrap();
1535
1536 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1539 let mut hashes = std::collections::BTreeSet::new();
1540 repo.collect_blob_hashes(&tree, &mut hashes).unwrap();
1541 for hash in &hashes {
1542 assert!(
1543 repo.store().loose_blob_path(hash).is_none(),
1544 "blob {} should be pack-only before warm",
1545 hash
1546 );
1547 }
1548
1549 let stats = repo
1551 .warm_canonical_store_for_state(&state.change_id)
1552 .unwrap();
1553 assert_eq!(stats.errors, 0, "warm pass produced errors: {:?}", stats);
1554 assert_eq!(stats.total(), hashes.len());
1555 assert!(
1556 stats.promoted >= hashes.len(),
1557 "expected to promote all {} blobs, got {} (already_loose={})",
1558 hashes.len(),
1559 stats.promoted,
1560 stats.already_loose
1561 );
1562 for hash in &hashes {
1563 assert!(
1564 repo.store().loose_blob_path(hash).is_some(),
1565 "blob {} should be loose+uncompressed after warm",
1566 hash
1567 );
1568 }
1569
1570 let worktree_a = temp_dir.path().join("wt-a");
1574 let worktree_b = temp_dir.path().join("wt-b");
1575 repo.materialize_tree(&tree, &worktree_a).unwrap();
1576 repo.materialize_tree(&tree, &worktree_b).unwrap();
1577
1578 for entry in tree.entries() {
1579 let path_a = worktree_a.join(&entry.name);
1580 let path_b = worktree_b.join(&entry.name);
1581 assert_eq!(
1582 std::fs::read(&path_a).unwrap(),
1583 std::fs::read(&path_b).unwrap(),
1584 "{} must read back identically across worktrees",
1585 entry.name
1586 );
1587 }
1588 }
1589
1590 #[test]
1593 #[cfg(unix)]
1594 fn warm_canonical_store_is_idempotent() {
1595 let temp_dir = TempDir::new().unwrap();
1596 let repo = Repository::init_default(temp_dir.path()).unwrap();
1597
1598 for i in 0..3 {
1599 std::fs::write(
1600 temp_dir.path().join(format!("idem-{i}.txt")),
1601 format!("idem payload {i} {}", "x".repeat(160)),
1602 )
1603 .unwrap();
1604 }
1605 let state = repo
1606 .snapshot(Some("idempotent warm".to_string()), None)
1607 .unwrap();
1608 repo.store().pack_objects(false).unwrap();
1609 repo.store().prune_loose_objects().unwrap();
1610
1611 let first = repo
1612 .warm_canonical_store_for_state(&state.change_id)
1613 .unwrap();
1614 let second = repo
1615 .warm_canonical_store_for_state(&state.change_id)
1616 .unwrap();
1617
1618 assert_eq!(first.total(), second.total(), "blob count must be stable");
1619 assert_eq!(
1620 second.promoted, 0,
1621 "second warm must not promote anything (got {})",
1622 second.promoted
1623 );
1624 assert_eq!(
1625 second.already_loose,
1626 second.total(),
1627 "every blob must be already_loose on second pass"
1628 );
1629 assert_eq!(second.errors, 0);
1630 }
1631
1632 #[test]
1645 #[cfg(unix)]
1646 fn packed_repo_storage_win_after_warm_and_materialize() {
1647 use std::{collections::HashSet, os::unix::fs::MetadataExt};
1648
1649 let temp_dir = TempDir::new().unwrap();
1650 if !filesystem_supports_reflink(temp_dir.path()) {
1651 eprintln!(
1652 "[skip] filesystem at {:?} does not support reflinks; storage-win test is reflink-specific",
1653 temp_dir.path()
1654 );
1655 return;
1656 }
1657
1658 let repo = Repository::init_default(temp_dir.path()).unwrap();
1659
1660 let blob_count = 5;
1661 for i in 0..blob_count {
1662 std::fs::write(
1663 temp_dir.path().join(format!("file-{i}.txt")),
1664 format!("packed-storage-win payload {i} {}", "x".repeat(140 + i * 8)),
1665 )
1666 .unwrap();
1667 }
1668 let state = repo
1669 .snapshot(Some("packed storage win".to_string()), None)
1670 .unwrap();
1671 repo.store().pack_objects(false).unwrap();
1673 repo.store().prune_loose_objects().unwrap();
1674
1675 let stats = repo
1677 .warm_canonical_store_for_state(&state.change_id)
1678 .unwrap();
1679 assert_eq!(stats.errors, 0);
1680
1681 let n_worktrees = 6;
1682 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1683 let mut all_paths = Vec::new();
1684 for w in 0..n_worktrees {
1685 let worktree = temp_dir.path().join(format!("wt-{w}"));
1686 repo.materialize_tree(&tree, &worktree).unwrap();
1687 for i in 0..blob_count {
1688 all_paths.push(worktree.join(format!("file-{i}.txt")));
1689 }
1690 }
1691
1692 let mut inodes = HashSet::new();
1695 for path in &all_paths {
1696 inodes.insert(std::fs::metadata(path).unwrap().ino());
1697 }
1698 assert_eq!(
1699 inodes.len(),
1700 all_paths.len(),
1701 "every reflinked worktree file must have its own inode (got {} for {} files)",
1702 inodes.len(),
1703 all_paths.len()
1704 );
1705
1706 let mut canonical_inodes = HashSet::new();
1709 for hash in tree.entries().iter().map(|e| &e.hash) {
1710 if let Some(loose) = repo.store().loose_blob_path(hash) {
1711 canonical_inodes.insert(std::fs::metadata(&loose).unwrap().ino());
1712 }
1713 }
1714 for inode in &inodes {
1715 assert!(
1716 !canonical_inodes.contains(inode),
1717 "worktree file inode {} aliases the canonical loose blob — that's the hardlink bug",
1718 inode
1719 );
1720 }
1721
1722 eprintln!(
1723 "[packed-storage-win] n_worktrees={} blobs/tree={} reflink_path_confirmed=true",
1724 n_worktrees, blob_count
1725 );
1726 }
1727
1728 #[test]
1732 fn promote_to_loose_uncompressed_idempotent_on_loose_blob() {
1733 let temp_dir = TempDir::new().unwrap();
1734 let repo = Repository::init_default(temp_dir.path()).unwrap();
1735
1736 let blob = Blob::from("idempotent promote payload");
1737 let hash = repo.store().put_blob(&blob).unwrap();
1738 assert!(repo.store().loose_blob_path(&hash).is_some());
1740
1741 let did_work = repo.store().promote_to_loose_uncompressed(&hash).unwrap();
1742 assert!(
1743 !did_work,
1744 "promote on already-loose+uncompressed blob must be a no-op"
1745 );
1746 }
1747
1748 #[test]
1753 fn promote_to_loose_uncompressed_returns_error_for_missing_blob() {
1754 use objects::object::ContentHash;
1755
1756 let temp_dir = TempDir::new().unwrap();
1757 let repo = Repository::init_default(temp_dir.path()).unwrap();
1758
1759 let bogus = ContentHash::compute_typed("blob", b"never-stored");
1760 let result = repo.store().promote_to_loose_uncompressed(&bogus);
1761 assert!(
1762 result.is_err(),
1763 "promote on missing blob must error, got {:?}",
1764 result
1765 );
1766 }
1767}