1use std::{
5 collections::BTreeSet,
6 fs,
7 num::NonZeroUsize,
8 path::{Path, PathBuf},
9 sync::atomic::{AtomicBool, Ordering},
10 thread,
11 time::Instant,
12};
13
14use objects::{
15 fs_atomic::enrich_fs_error,
16 object::{ChangeId, ContentHash, EntryType, Tree},
17};
18use tracing::{debug, instrument};
19
20use super::{HeddleError, Repository, Result, repository_worktree_apply::is_directory_not_empty};
21use crate::{
22 worktree_index::IndexEntry,
23 worktree_walk::{build_cached_entry, cache_key},
24};
25
26struct MaterializationContext {
53 reflink_supported: AtomicBool,
54 reflink_count: std::sync::atomic::AtomicUsize,
55 copy_count: std::sync::atomic::AtomicUsize,
56}
57
58impl MaterializationContext {
59 fn new() -> Self {
60 Self {
61 reflink_supported: AtomicBool::new(true),
64 reflink_count: std::sync::atomic::AtomicUsize::new(0),
65 copy_count: std::sync::atomic::AtomicUsize::new(0),
66 }
67 }
68
69 fn reflinks_enabled(&self) -> bool {
70 self.reflink_supported.load(Ordering::Relaxed)
71 }
72
73 fn record_reflink(&self) {
74 self.reflink_count.fetch_add(1, Ordering::Relaxed);
75 }
76
77 fn record_copy(&self) {
78 self.copy_count.fetch_add(1, Ordering::Relaxed);
79 }
80
81 fn disable_reflinks(&self) {
84 self.reflink_supported.store(false, Ordering::Relaxed);
85 }
86}
87
88const MATERIALIZE_PARALLEL_THRESHOLD: usize = 32;
89const MATERIALIZE_THREADS_ENV: &str = "HEDDLE_MATERIALIZE_THREADS";
90
91struct MaterializationPlan {
92 directories: Vec<PathBuf>,
93 directory_contexts: Vec<MaterializedDirectoryContext>,
94 leaves: Vec<WorktreeWriteOp>,
95 file_count: usize,
96 symlink_count: usize,
97}
98
99#[derive(Debug)]
100pub(crate) struct MaterializedTree {
101 pub(crate) file_entries: Vec<SeededWorktreeEntry>,
102 pub(crate) directory_contexts: Vec<MaterializedDirectoryContext>,
103}
104
105#[derive(Debug)]
106pub(crate) struct SeededWorktreeEntry {
107 pub(crate) key: String,
108 pub(crate) entry: IndexEntry,
109}
110
111#[derive(Debug)]
112pub(crate) struct MaterializedDirectoryContext {
113 pub(crate) key: String,
114 pub(crate) path: PathBuf,
115 pub(crate) child_names: Vec<String>,
116 pub(crate) tree_hash: ContentHash,
117}
118
119#[derive(Clone, Debug)]
120pub(crate) enum WorktreeWriteOp {
121 Blob {
122 path: PathBuf,
123 hash: ContentHash,
124 executable: bool,
125 },
126 Symlink {
127 path: PathBuf,
128 hash: ContentHash,
129 },
130}
131
132impl WorktreeWriteOp {
133 pub(crate) fn path(&self) -> &Path {
134 match self {
135 Self::Blob { path, .. } | Self::Symlink { path, .. } => path,
136 }
137 }
138
139 pub(crate) fn hash(&self) -> ContentHash {
140 match self {
141 Self::Blob { hash, .. } | Self::Symlink { hash, .. } => *hash,
142 }
143 }
144
145 pub(crate) fn executable(&self) -> bool {
146 match self {
147 Self::Blob { executable, .. } => *executable,
148 Self::Symlink { .. } => false,
149 }
150 }
151
152 pub(crate) fn index_kind(&self) -> crate::worktree_index::IndexEntryKind {
153 match self {
154 Self::Blob { .. } => crate::worktree_index::IndexEntryKind::File,
155 Self::Symlink { .. } => crate::worktree_index::IndexEntryKind::Symlink,
156 }
157 }
158}
159
160#[derive(Debug, Default, Clone, Copy)]
175pub struct WarmCanonicalStoreStats {
176 pub promoted: usize,
179 pub already_loose: usize,
181 pub errors: usize,
187}
188
189impl WarmCanonicalStoreStats {
190 pub fn total(&self) -> usize {
192 self.promoted + self.already_loose + self.errors
193 }
194}
195
196impl Repository {
197 #[instrument(skip(self), fields(state_id = %state_id))]
209 pub fn warm_canonical_store_for_state(
210 &self,
211 state_id: &ChangeId,
212 ) -> Result<WarmCanonicalStoreStats> {
213 self.warm_canonical_store_for_states(std::slice::from_ref(state_id))
214 }
215
216 #[instrument(skip(self, state_ids), fields(state_count = state_ids.len()))]
222 pub fn warm_canonical_store_for_states(
223 &self,
224 state_ids: &[ChangeId],
225 ) -> Result<WarmCanonicalStoreStats> {
226 let mut blob_hashes = BTreeSet::new();
227 for state_id in state_ids {
228 let state = self
229 .store
230 .get_state(state_id)?
231 .ok_or_else(|| HeddleError::NotFound(format!("state {} not in store", state_id)))?;
232 let tree = self.store.get_tree(&state.tree)?.ok_or_else(|| {
233 HeddleError::NotFound(format!("tree {} (for state {})", state.tree, state_id))
234 })?;
235 self.collect_blob_hashes(&tree, &mut blob_hashes)?;
236 }
237
238 let mut stats = WarmCanonicalStoreStats::default();
239 for hash in &blob_hashes {
240 match self.store.promote_to_loose_uncompressed(hash) {
241 Ok(true) => stats.promoted += 1,
242 Ok(false) => stats.already_loose += 1,
243 Err(err) => {
244 debug!(
245 ?err,
246 hash = %hash,
247 "promote_to_loose_uncompressed failed during warm pass"
248 );
249 stats.errors += 1;
250 }
251 }
252 }
253
254 debug!(
255 promoted = stats.promoted,
256 already_loose = stats.already_loose,
257 errors = stats.errors,
258 "Warm canonical store pass complete"
259 );
260
261 Ok(stats)
262 }
263
264 fn collect_blob_hashes(&self, tree: &Tree, out: &mut BTreeSet<ContentHash>) -> Result<()> {
265 for entry in tree.entries() {
266 match entry.entry_type {
277 EntryType::Blob | EntryType::Symlink => {
278 out.insert(entry.hash);
279 }
280 EntryType::Tree => {
281 let subtree = self
282 .store
283 .get_tree(&entry.hash)?
284 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
285 self.collect_blob_hashes(&subtree, out)?;
286 }
287 }
288 }
289 Ok(())
290 }
291
292 #[instrument(skip(self, tree), fields(dir = %dir.display(), entries = tree.len()))]
294 pub fn materialize_tree(&self, tree: &Tree, dir: &Path) -> Result<()> {
295 self.materialize_tree_seeded(tree, dir).map(|_| ())
296 }
297
298 pub(crate) fn materialize_tree_seeded(
299 &self,
300 tree: &Tree,
301 dir: &Path,
302 ) -> Result<MaterializedTree> {
303 let plan_start = Instant::now();
304 let mut plan = MaterializationPlan {
305 directories: Vec::new(),
306 directory_contexts: Vec::new(),
307 leaves: Vec::new(),
308 file_count: 0,
309 symlink_count: 0,
310 };
311 self.plan_materialization(tree, Path::new(""), dir, &mut plan)?;
312 let plan_duration_ms = plan_start.elapsed().as_millis();
313
314 let execution_start = Instant::now();
315 let requested_threads = requested_materialization_threads();
316 fs::create_dir_all(dir)
317 .map_err(|e| HeddleError::Io(enrich_fs_error(dir, "creating", e)))?;
318 for directory in &plan.directories {
319 fs::create_dir_all(directory)
320 .map_err(|e| HeddleError::Io(enrich_fs_error(directory, "creating", e)))?;
321 }
322
323 let (worker_count, file_entries) = self.materialize_write_ops_seeded(&plan.leaves)?;
324
325 debug!(
326 directories = plan.directories.len(),
327 files = plan.file_count,
328 symlinks = plan.symlink_count,
329 workers = worker_count,
330 requested_workers = requested_threads.map(NonZeroUsize::get),
331 plan_duration_ms,
332 execution_duration_ms = execution_start.elapsed().as_millis(),
333 parallel = worker_count > 1,
334 "Tree materialization complete"
335 );
336
337 Ok(MaterializedTree {
338 file_entries,
339 directory_contexts: plan.directory_contexts,
340 })
341 }
342
343 fn plan_materialization(
344 &self,
345 tree: &Tree,
346 rel_dir: &Path,
347 dir: &Path,
348 plan: &mut MaterializationPlan,
349 ) -> Result<()> {
350 plan.directory_contexts.push(MaterializedDirectoryContext {
351 key: cache_key(rel_dir),
352 path: dir.to_path_buf(),
353 child_names: tree
354 .entries()
355 .iter()
356 .map(|entry| entry.name.clone())
357 .collect(),
358 tree_hash: tree.hash(),
359 });
360
361 for entry in tree.entries() {
362 let path = dir.join(&entry.name);
363 let rel_path = rel_dir.join(&entry.name);
364 let is_symlink = entry.entry_type == EntryType::Symlink
372 || entry.mode == objects::object::FileMode::Symlink;
373 if is_symlink {
374 plan.symlink_count += 1;
375 plan.leaves.push(WorktreeWriteOp::Symlink {
376 path,
377 hash: entry.hash,
378 });
379 continue;
380 }
381 match entry.entry_type {
382 EntryType::Blob => {
383 plan.file_count += 1;
384 plan.leaves.push(WorktreeWriteOp::Blob {
385 path,
386 hash: entry.hash,
387 executable: entry.is_executable(),
388 });
389 }
390 EntryType::Tree => {
391 let subtree = self
392 .store
393 .get_tree(&entry.hash)?
394 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
395 plan.directories.push(path.clone());
396 self.plan_materialization(&subtree, &rel_path, &path, plan)?;
397 }
398 EntryType::Symlink => {
399 unreachable!(
401 "EntryType::Symlink should have been routed by the is_symlink guard"
402 );
403 }
404 }
405 }
406
407 Ok(())
408 }
409
410 pub(crate) fn materialize_write_ops(&self, writes: &[WorktreeWriteOp]) -> Result<usize> {
411 self.materialize_write_ops_seeded(writes)
412 .map(|(worker_count, _)| worker_count)
413 }
414
415 pub(crate) fn materialize_write_ops_seeded(
416 &self,
417 writes: &[WorktreeWriteOp],
418 ) -> Result<(usize, Vec<SeededWorktreeEntry>)> {
419 prepare_parent_directories(writes)?;
420
421 let requested_threads = requested_materialization_threads();
422 let worker_count = materialization_worker_count(writes.len(), requested_threads);
423
424 let context = MaterializationContext::new();
432
433 let result = if worker_count <= 1 {
434 let mut seeded = Vec::with_capacity(writes.len());
435 for write in writes {
436 seeded.push(self.materialize_write_op(write, &context)?);
437 }
438 Ok((worker_count, seeded))
439 } else {
440 let chunk_size = writes.len().div_ceil(worker_count);
441 let seeded = thread::scope(|scope| -> Result<Vec<SeededWorktreeEntry>> {
442 let mut workers = Vec::new();
443 let context = &context;
444 for chunk in writes.chunks(chunk_size) {
445 workers.push(scope.spawn(move || -> Result<Vec<SeededWorktreeEntry>> {
446 let mut seeded = Vec::with_capacity(chunk.len());
447 for write in chunk {
448 seeded.push(self.materialize_write_op(write, context)?);
449 }
450 Ok(seeded)
451 }));
452 }
453
454 let mut seeded = Vec::with_capacity(writes.len());
455 for worker in workers {
456 seeded.extend(worker.join().map_err(|_| {
457 HeddleError::Config("materialization worker panicked".to_string())
458 })??);
459 }
460
461 Ok(seeded)
462 })?;
463
464 Ok((worker_count, seeded))
465 };
466
467 let reflinks = context.reflink_count.load(Ordering::Relaxed);
468 let copies = context.copy_count.load(Ordering::Relaxed);
469 if reflinks + copies > 0 {
470 debug!(
471 reflinks,
472 copies,
473 reflinks_enabled = context.reflinks_enabled(),
474 "Materialized blobs"
475 );
476 }
477
478 result
479 }
480
481 fn materialize_write_op(
482 &self,
483 write: &WorktreeWriteOp,
484 context: &MaterializationContext,
485 ) -> Result<SeededWorktreeEntry> {
486 match write {
487 WorktreeWriteOp::Blob {
488 path,
489 hash,
490 executable,
491 } => {
492 self.materialize_blob(path, hash, *executable, context)?;
493 }
494 WorktreeWriteOp::Symlink { path, hash } => {
495 let blob = self
496 .store
497 .get_blob(hash)?
498 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
499 #[cfg(unix)]
500 {
501 let target = std::str::from_utf8(blob.content()).map_err(|_| {
502 HeddleError::InvalidObject("invalid symlink target".to_string())
503 })?;
504 remove_materialized_leaf(path)?;
505 std::os::unix::fs::symlink(target, path)?;
506 }
507 #[cfg(not(unix))]
508 let _ = blob;
509 }
510 }
511
512 let metadata = fs::symlink_metadata(write.path())?;
513 let entry = build_cached_entry(
514 write.hash(),
515 &metadata,
516 write.executable(),
517 write.index_kind(),
518 )
519 .ok_or_else(|| {
520 HeddleError::Config(format!(
521 "seed materialized worktree entry for {}",
522 write.path().display()
523 ))
524 })?;
525
526 Ok(SeededWorktreeEntry {
527 key: cache_key(
528 write
529 .path()
530 .strip_prefix(self.root())
531 .unwrap_or(write.path()),
532 ),
533 entry,
534 })
535 }
536
537 fn materialize_blob(
563 &self,
564 dest: &Path,
565 hash: &ContentHash,
566 executable: bool,
567 context: &MaterializationContext,
568 ) -> Result<()> {
569 if let Some(stub) = self
579 .redaction_stub_for_blob(hash)
580 .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?
581 {
582 let _ = fs::remove_file(dest);
583 fs::write(dest, stub.as_bytes())?;
584 set_file_mode(dest, false)?;
588 context.record_copy();
591 let _ = executable;
592 return Ok(());
593 }
594
595 if context.reflinks_enabled() {
596 if let Some(source) = self.store.loose_blob_path(hash)
598 && self.try_clone(&source, dest, executable, context)?
599 {
600 return Ok(());
601 }
602 match self.store.promote_to_loose_uncompressed(hash) {
614 Ok(_) => {
615 if let Some(source) = self.store.loose_blob_path(hash)
616 && self.try_clone(&source, dest, executable, context)?
617 {
618 return Ok(());
619 }
620 }
621 Err(err) => {
622 debug!(
623 ?err,
624 hash = %hash,
625 "promote_to_loose_uncompressed failed; falling back to fs::write"
626 );
627 }
628 }
629 }
630
631 let blob = self
632 .store
633 .get_blob(hash)?
634 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
635 let _ = fs::remove_file(dest);
640 fs::write(dest, blob.content())?;
641 set_file_mode(dest, executable)?;
642 context.record_copy();
643 Ok(())
644 }
645
646 fn try_clone(
654 &self,
655 source: &Path,
656 dest: &Path,
657 executable: bool,
658 context: &MaterializationContext,
659 ) -> Result<bool> {
660 let _ = fs::remove_file(dest);
664 match objects::fs_clone::try_reflink(source, dest) {
665 Ok(true) => {
666 set_file_mode(dest, executable)?;
667 context.record_reflink();
668 Ok(true)
669 }
670 Ok(false) => {
671 debug!(
676 source = %source.display(),
677 dest = %dest.display(),
678 "reflink not supported on this filesystem; switching batch to fs::write fallback"
679 );
680 context.disable_reflinks();
681 Ok(false)
682 }
683 Err(err) => {
684 debug!(
685 ?err,
686 source = %source.display(),
687 dest = %dest.display(),
688 "reflink failed with I/O error"
689 );
690 Err(err.into())
691 }
692 }
693 }
694}
695
696fn prepare_parent_directories(writes: &[WorktreeWriteOp]) -> Result<()> {
697 let mut parents = BTreeSet::new();
698 for write in writes {
699 if let Some(parent) = write.path().parent() {
700 parents.insert(parent.to_path_buf());
701 }
702 }
703
704 for parent in parents {
705 fs::create_dir_all(&parent)
706 .map_err(|e| HeddleError::Io(enrich_fs_error(&parent, "creating", e)))?;
707 }
708
709 Ok(())
710}
711
712fn remove_materialized_leaf(path: &Path) -> Result<()> {
724 match fs::symlink_metadata(path) {
725 Ok(metadata) => {
726 let file_type = metadata.file_type();
727 if file_type.is_symlink() || file_type.is_file() {
728 fs::remove_file(path)
729 .map_err(|e| HeddleError::Io(enrich_fs_error(path, "removing", e)))?;
730 } else if file_type.is_dir() {
731 match fs::remove_dir(path) {
732 Ok(()) => {}
733 Err(error) if is_directory_not_empty(&error) => {}
734 Err(error) => {
735 return Err(HeddleError::Io(enrich_fs_error(path, "removing", error)));
736 }
737 }
738 }
739 Ok(())
740 }
741 Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
742 Err(error) => Err(HeddleError::Io(enrich_fs_error(path, "inspecting", error))),
743 }
744}
745
746fn set_file_mode(path: &Path, executable: bool) -> Result<()> {
747 #[cfg(unix)]
748 {
749 use std::os::unix::fs::PermissionsExt;
750
751 let mode = if executable { 0o755 } else { 0o644 };
752 fs::set_permissions(path, fs::Permissions::from_mode(mode))?;
753 }
754 #[cfg(not(unix))]
755 {
756 let _ = (path, executable);
757 }
758 Ok(())
759}
760
761fn materialization_worker_count(
762 operation_count: usize,
763 requested_threads: Option<NonZeroUsize>,
764) -> usize {
765 if operation_count < MATERIALIZE_PARALLEL_THRESHOLD {
766 return 1;
767 }
768
769 let available = requested_threads.unwrap_or_else(default_materialization_threads);
770 available.get().min(operation_count.max(1))
771}
772
773fn default_materialization_threads() -> NonZeroUsize {
774 std::thread::available_parallelism().unwrap_or(NonZeroUsize::MIN)
775}
776
777fn requested_materialization_threads() -> Option<NonZeroUsize> {
778 let raw = std::env::var(MATERIALIZE_THREADS_ENV).ok()?;
779 raw.trim().parse::<usize>().ok().and_then(NonZeroUsize::new)
780}
781
782#[cfg(test)]
783mod tests {
784 use std::{num::NonZeroUsize, path::PathBuf};
785
786 use objects::{fs_clone::filesystem_supports_reflink, object::Blob};
787 use tempfile::TempDir;
788
789 use super::{
790 Repository, WorktreeWriteOp, materialization_worker_count, remove_materialized_leaf,
791 };
792
793 #[test]
804 fn remove_materialized_leaf_tolerates_directory_not_empty() {
805 let temp = TempDir::new().unwrap();
806 let dir = temp.path().join("web");
807 std::fs::create_dir_all(dir.join("node_modules/lodash")).unwrap();
808 std::fs::write(dir.join("node_modules/lodash/index.js"), "ignored").unwrap();
809
810 remove_materialized_leaf(&dir).expect("must tolerate ENOTEMPTY");
813 assert!(
814 dir.join("node_modules/lodash/index.js").exists(),
815 "ignored content must survive the tolerated removal"
816 );
817 }
818
819 #[test]
822 fn remove_materialized_leaf_removes_empty_directory() {
823 let temp = TempDir::new().unwrap();
824 let dir = temp.path().join("emptydir");
825 std::fs::create_dir(&dir).unwrap();
826
827 remove_materialized_leaf(&dir).expect("must remove empty dir");
828 assert!(!dir.exists(), "empty directory must be removed");
829 }
830
831 #[test]
833 fn remove_materialized_leaf_is_noop_for_missing_path() {
834 let temp = TempDir::new().unwrap();
835 remove_materialized_leaf(&temp.path().join("does-not-exist"))
836 .expect("missing path must be a no-op");
837 }
838
839 #[test]
842 fn remove_materialized_leaf_removes_regular_file() {
843 let temp = TempDir::new().unwrap();
844 let file = temp.path().join("a.txt");
845 std::fs::write(&file, "content").unwrap();
846
847 remove_materialized_leaf(&file).expect("must remove regular file");
848 assert!(!file.exists(), "regular file must be removed");
849 }
850
851 #[test]
852 fn materialization_parallelism_stays_sequential_for_small_workloads() {
853 assert_eq!(materialization_worker_count(31, Some(NonZeroUsize::MIN)), 1);
854 }
855
856 #[test]
857 fn materialization_parallelism_respects_requested_thread_cap() {
858 assert_eq!(materialization_worker_count(128, NonZeroUsize::new(4)), 4);
859 }
860
861 #[test]
862 fn materialize_write_ops_prepares_missing_parent_directories() {
863 let temp_dir = TempDir::new().unwrap();
864 let repo = Repository::init_default(temp_dir.path()).unwrap();
865
866 let blob = Blob::from("cold pull payload");
867 let hash = repo.store().put_blob(&blob).unwrap();
868 let file_path = temp_dir.path().join("nested/deep/file.txt");
869
870 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
871 path: file_path.clone(),
872 hash,
873 executable: false,
874 }])
875 .unwrap();
876
877 assert_eq!(
878 std::fs::read_to_string(&file_path).unwrap(),
879 "cold pull payload"
880 );
881 }
882
883 #[test]
890 #[cfg(unix)]
891 fn materialized_blob_uses_normal_writable_mode() {
892 use std::os::unix::fs::PermissionsExt;
893
894 let temp_dir = TempDir::new().unwrap();
895 let repo = Repository::init_default(temp_dir.path()).unwrap();
896
897 let blob = Blob::from("normal mode payload");
898 let hash = repo.store().put_blob(&blob).unwrap();
899 let regular = temp_dir.path().join("worktree/file.txt");
900 let exec = temp_dir.path().join("worktree/run.sh");
901
902 repo.materialize_write_ops(&[
903 WorktreeWriteOp::Blob {
904 path: regular.clone(),
905 hash,
906 executable: false,
907 },
908 WorktreeWriteOp::Blob {
909 path: exec.clone(),
910 hash,
911 executable: true,
912 },
913 ])
914 .unwrap();
915
916 let regular_mode = std::fs::metadata(®ular).unwrap().permissions().mode() & 0o777;
917 let exec_mode = std::fs::metadata(&exec).unwrap().permissions().mode() & 0o777;
918 assert_eq!(
919 regular_mode, 0o644,
920 "regular blob must be 0o644 (got 0o{:o})",
921 regular_mode
922 );
923 assert_eq!(
924 exec_mode, 0o755,
925 "executable blob must be 0o755 (got 0o{:o})",
926 exec_mode
927 );
928
929 std::fs::write(®ular, b"agent edits this").unwrap();
932 assert_eq!(std::fs::read(®ular).unwrap(), b"agent edits this");
933 }
934
935 #[test]
943 #[cfg(unix)]
944 fn materialize_then_chmod_and_write_does_not_affect_sibling_worktree() {
945 use std::os::unix::fs::PermissionsExt;
946
947 let temp_dir = TempDir::new().unwrap();
948 let repo = Repository::init_default(temp_dir.path()).unwrap();
949
950 let blob = Blob::from("canonical bytes that must never change");
951 let hash = repo.store().put_blob(&blob).unwrap();
952
953 let worktree_a = temp_dir.path().join("wt-a/file.txt");
954 let worktree_b = temp_dir.path().join("wt-b/file.txt");
955
956 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
957 path: worktree_a.clone(),
958 hash,
959 executable: false,
960 }])
961 .unwrap();
962 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
963 path: worktree_b.clone(),
964 hash,
965 executable: false,
966 }])
967 .unwrap();
968
969 std::fs::set_permissions(&worktree_a, std::fs::Permissions::from_mode(0o644)).unwrap();
974 std::fs::write(&worktree_a, b"AGENT_TAMPERED_WITH_WORKTREE_A").unwrap();
975
976 assert_eq!(
978 std::fs::read(&worktree_b).unwrap(),
979 blob.content(),
980 "sibling worktree must keep canonical bytes despite in-place write to worktree-a"
981 );
982 if let Some(loose) = repo.store().loose_blob_path(&hash) {
984 assert_eq!(
985 std::fs::read(&loose).unwrap(),
986 blob.content(),
987 "canonical loose blob must keep canonical bytes despite in-place write to worktree-a"
988 );
989 }
990 }
991
992 #[test]
997 #[cfg(unix)]
998 fn materialize_atomic_rename_does_not_affect_sibling_worktree() {
999 let temp_dir = TempDir::new().unwrap();
1000 let repo = Repository::init_default(temp_dir.path()).unwrap();
1001
1002 let blob = Blob::from("atomic-rename canonical bytes");
1003 let hash = repo.store().put_blob(&blob).unwrap();
1004
1005 let worktree_a = temp_dir.path().join("wt-a/file.txt");
1006 let worktree_b = temp_dir.path().join("wt-b/file.txt");
1007
1008 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1009 path: worktree_a.clone(),
1010 hash,
1011 executable: false,
1012 }])
1013 .unwrap();
1014 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1015 path: worktree_b.clone(),
1016 hash,
1017 executable: false,
1018 }])
1019 .unwrap();
1020
1021 let tmp = temp_dir.path().join("wt-a/file.txt.tmp");
1022 std::fs::write(&tmp, b"NEW_CONTENT_VIA_ATOMIC_RENAME").unwrap();
1023 std::fs::rename(&tmp, &worktree_a).unwrap();
1024
1025 assert_eq!(
1026 std::fs::read(&worktree_a).unwrap(),
1027 b"NEW_CONTENT_VIA_ATOMIC_RENAME"
1028 );
1029 assert_eq!(
1030 std::fs::read(&worktree_b).unwrap(),
1031 blob.content(),
1032 "sibling worktree must keep canonical bytes despite atomic rename in worktree-a"
1033 );
1034 }
1035
1036 #[test]
1047 #[cfg(unix)]
1048 fn materialize_uses_reflink_when_filesystem_supports_it() {
1049 use std::os::unix::fs::MetadataExt;
1050
1051 let temp_dir = TempDir::new().unwrap();
1052 if !filesystem_supports_reflink(temp_dir.path()) {
1053 eprintln!(
1054 "[skip] filesystem at {:?} does not advertise reflink support",
1055 temp_dir.path()
1056 );
1057 return;
1058 }
1059
1060 let repo = Repository::init_default(temp_dir.path()).unwrap();
1061 let blob = Blob::from("reflink correctness check, kept under compression threshold");
1062 let hash = repo.store().put_blob(&blob).unwrap();
1063 let worktree = temp_dir.path().join("wt/file.txt");
1064
1065 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1066 path: worktree.clone(),
1067 hash,
1068 executable: false,
1069 }])
1070 .unwrap();
1071
1072 let loose = repo
1073 .store()
1074 .loose_blob_path(&hash)
1075 .expect("blob must be loose+uncompressed (under threshold)");
1076 let loose_inode = std::fs::metadata(&loose).unwrap().ino();
1077 let worktree_inode = std::fs::metadata(&worktree).unwrap().ino();
1078 assert_ne!(
1079 loose_inode, worktree_inode,
1080 "reflinked worktree file must have a distinct inode from canonical loose blob (got {} for both — that's a hardlink, the bug we fixed)",
1081 loose_inode
1082 );
1083 let nlink = std::fs::metadata(&loose).unwrap().nlink();
1085 assert_eq!(
1086 nlink, 1,
1087 "canonical loose blob must not be aliased (nlink={}); reflinks share blocks, not inodes",
1088 nlink
1089 );
1090 }
1091
1092 #[test]
1099 #[cfg(unix)]
1100 fn materialize_blob_into_two_worktrees_reads_back_canonical_bytes() {
1101 let temp_dir = TempDir::new().unwrap();
1102 let repo = Repository::init_default(temp_dir.path()).unwrap();
1103
1104 let blob = Blob::from("two-worktree readback payload");
1105 let hash = repo.store().put_blob(&blob).unwrap();
1106
1107 let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1108 let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1109
1110 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1111 path: worktree_a.clone(),
1112 hash,
1113 executable: false,
1114 }])
1115 .unwrap();
1116 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1117 path: worktree_b.clone(),
1118 hash,
1119 executable: false,
1120 }])
1121 .unwrap();
1122
1123 assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1124 assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1125 }
1126
1127 #[test]
1133 #[cfg(unix)]
1134 fn materialize_symlink_op_produces_real_symlink_not_hardlink() {
1135 let temp_dir = TempDir::new().unwrap();
1136 let repo = Repository::init_default(temp_dir.path()).unwrap();
1137
1138 let symlink_blob = Blob::new(b"../canonical".to_vec());
1139 let symlink_hash = repo.store().put_blob(&symlink_blob).unwrap();
1140 let path = temp_dir.path().join("worktree/link.txt");
1141
1142 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1143 path: path.clone(),
1144 hash: symlink_hash,
1145 }])
1146 .unwrap();
1147
1148 let meta = std::fs::symlink_metadata(&path).unwrap();
1149 assert!(
1150 meta.file_type().is_symlink(),
1151 "Symlink op must produce a real symlink, not a hardlinked regular file"
1152 );
1153 assert_eq!(
1154 std::fs::read_link(&path).unwrap(),
1155 PathBuf::from("../canonical")
1156 );
1157 }
1158
1159 #[test]
1160 #[cfg(unix)]
1161 fn materialize_symlink_op_replaces_existing_symlink() {
1162 let temp_dir = TempDir::new().unwrap();
1163 let repo = Repository::init_default(temp_dir.path()).unwrap();
1164
1165 let first_hash = repo.store().put_blob(&Blob::from("first")).unwrap();
1166 let second_hash = repo.store().put_blob(&Blob::from("second")).unwrap();
1167 let path = temp_dir.path().join("worktree/link.txt");
1168
1169 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1170 path: path.clone(),
1171 hash: first_hash,
1172 }])
1173 .unwrap();
1174 repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1175 path: path.clone(),
1176 hash: second_hash,
1177 }])
1178 .unwrap();
1179
1180 assert_eq!(std::fs::read_link(&path).unwrap(), PathBuf::from("second"));
1181 }
1182
1183 #[test]
1184 #[cfg(unix)]
1185 fn materialize_write_ops_reuses_prepared_parent_for_multiple_writes() {
1186 let temp_dir = TempDir::new().unwrap();
1187 let repo = Repository::init_default(temp_dir.path()).unwrap();
1188
1189 let symlink_target = Blob::new(b"../target.txt".to_vec());
1190 let target_hash = repo.store().put_blob(&Blob::from("target")).unwrap();
1191 let symlink_hash = repo.store().put_blob(&symlink_target).unwrap();
1192 let base_dir = temp_dir.path().join("nested/deep");
1193 let target_path = base_dir.join("target.txt");
1194 let link_path = base_dir.join("link.txt");
1195
1196 repo.materialize_write_ops(&[
1197 WorktreeWriteOp::Blob {
1198 path: target_path.clone(),
1199 hash: target_hash,
1200 executable: false,
1201 },
1202 WorktreeWriteOp::Symlink {
1203 path: link_path.clone(),
1204 hash: symlink_hash,
1205 },
1206 ])
1207 .unwrap();
1208
1209 assert_eq!(std::fs::read_to_string(&target_path).unwrap(), "target");
1210 assert_eq!(
1211 std::fs::read_link(&link_path).unwrap(),
1212 PathBuf::from("../target.txt")
1213 );
1214 }
1215
1216 #[test]
1224 #[cfg(unix)]
1225 fn lazy_promotion_after_pack_and_prune_restores_loose_mirror() {
1226 let temp_dir = TempDir::new().unwrap();
1227 let repo = Repository::init_default(temp_dir.path()).unwrap();
1228
1229 let blob = Blob::from(
1230 "lazy-promotion payload, packed-then-pruned, kept under compression threshold",
1231 );
1232 let hash = repo.store().put_blob(&blob).unwrap();
1233
1234 repo.store().pack_objects(false).unwrap();
1237 repo.store().prune_loose_objects().unwrap();
1238 assert!(
1239 repo.store().loose_blob_path(&hash).is_none(),
1240 "after pack+prune, the canonical loose path must be empty"
1241 );
1242
1243 let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1244 let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1245 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1246 path: worktree_a.clone(),
1247 hash,
1248 executable: false,
1249 }])
1250 .unwrap();
1251 repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1252 path: worktree_b.clone(),
1253 hash,
1254 executable: false,
1255 }])
1256 .unwrap();
1257
1258 assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1260 assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1261
1262 let loose = repo
1264 .store()
1265 .loose_blob_path(&hash)
1266 .expect("after lazy promotion the canonical loose path must exist");
1267 assert_eq!(std::fs::read(&loose).unwrap(), blob.content());
1268 }
1269
1270 #[test]
1276 #[cfg(unix)]
1277 fn proactive_warm_promotes_all_state_blobs() {
1278 let temp_dir = TempDir::new().unwrap();
1279 let repo = Repository::init_default(temp_dir.path()).unwrap();
1280
1281 for i in 0..4 {
1283 std::fs::write(
1284 temp_dir.path().join(format!("file-{i}.txt")),
1285 format!("warm-pass payload {i} {}", "x".repeat(140)),
1286 )
1287 .unwrap();
1288 }
1289 let state = repo
1290 .snapshot(Some("warm-pass test".to_string()), None)
1291 .unwrap();
1292
1293 repo.store().pack_objects(false).unwrap();
1295 repo.store().prune_loose_objects().unwrap();
1296
1297 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1300 let mut hashes = std::collections::BTreeSet::new();
1301 repo.collect_blob_hashes(&tree, &mut hashes).unwrap();
1302 for hash in &hashes {
1303 assert!(
1304 repo.store().loose_blob_path(hash).is_none(),
1305 "blob {} should be pack-only before warm",
1306 hash
1307 );
1308 }
1309
1310 let stats = repo
1312 .warm_canonical_store_for_state(&state.change_id)
1313 .unwrap();
1314 assert_eq!(stats.errors, 0, "warm pass produced errors: {:?}", stats);
1315 assert_eq!(stats.total(), hashes.len());
1316 assert!(
1317 stats.promoted >= hashes.len(),
1318 "expected to promote all {} blobs, got {} (already_loose={})",
1319 hashes.len(),
1320 stats.promoted,
1321 stats.already_loose
1322 );
1323 for hash in &hashes {
1324 assert!(
1325 repo.store().loose_blob_path(hash).is_some(),
1326 "blob {} should be loose+uncompressed after warm",
1327 hash
1328 );
1329 }
1330
1331 let worktree_a = temp_dir.path().join("wt-a");
1335 let worktree_b = temp_dir.path().join("wt-b");
1336 repo.materialize_tree(&tree, &worktree_a).unwrap();
1337 repo.materialize_tree(&tree, &worktree_b).unwrap();
1338
1339 for entry in tree.entries() {
1340 let path_a = worktree_a.join(&entry.name);
1341 let path_b = worktree_b.join(&entry.name);
1342 assert_eq!(
1343 std::fs::read(&path_a).unwrap(),
1344 std::fs::read(&path_b).unwrap(),
1345 "{} must read back identically across worktrees",
1346 entry.name
1347 );
1348 }
1349 }
1350
1351 #[test]
1354 #[cfg(unix)]
1355 fn warm_canonical_store_is_idempotent() {
1356 let temp_dir = TempDir::new().unwrap();
1357 let repo = Repository::init_default(temp_dir.path()).unwrap();
1358
1359 for i in 0..3 {
1360 std::fs::write(
1361 temp_dir.path().join(format!("idem-{i}.txt")),
1362 format!("idem payload {i} {}", "x".repeat(160)),
1363 )
1364 .unwrap();
1365 }
1366 let state = repo
1367 .snapshot(Some("idempotent warm".to_string()), None)
1368 .unwrap();
1369 repo.store().pack_objects(false).unwrap();
1370 repo.store().prune_loose_objects().unwrap();
1371
1372 let first = repo
1373 .warm_canonical_store_for_state(&state.change_id)
1374 .unwrap();
1375 let second = repo
1376 .warm_canonical_store_for_state(&state.change_id)
1377 .unwrap();
1378
1379 assert_eq!(first.total(), second.total(), "blob count must be stable");
1380 assert_eq!(
1381 second.promoted, 0,
1382 "second warm must not promote anything (got {})",
1383 second.promoted
1384 );
1385 assert_eq!(
1386 second.already_loose,
1387 second.total(),
1388 "every blob must be already_loose on second pass"
1389 );
1390 assert_eq!(second.errors, 0);
1391 }
1392
1393 #[test]
1406 #[cfg(unix)]
1407 fn packed_repo_storage_win_after_warm_and_materialize() {
1408 use std::{collections::HashSet, os::unix::fs::MetadataExt};
1409
1410 let temp_dir = TempDir::new().unwrap();
1411 if !filesystem_supports_reflink(temp_dir.path()) {
1412 eprintln!(
1413 "[skip] filesystem at {:?} does not support reflinks; storage-win test is reflink-specific",
1414 temp_dir.path()
1415 );
1416 return;
1417 }
1418
1419 let repo = Repository::init_default(temp_dir.path()).unwrap();
1420
1421 let blob_count = 5;
1422 for i in 0..blob_count {
1423 std::fs::write(
1424 temp_dir.path().join(format!("file-{i}.txt")),
1425 format!("packed-storage-win payload {i} {}", "x".repeat(140 + i * 8)),
1426 )
1427 .unwrap();
1428 }
1429 let state = repo
1430 .snapshot(Some("packed storage win".to_string()), None)
1431 .unwrap();
1432 repo.store().pack_objects(false).unwrap();
1434 repo.store().prune_loose_objects().unwrap();
1435
1436 let stats = repo
1438 .warm_canonical_store_for_state(&state.change_id)
1439 .unwrap();
1440 assert_eq!(stats.errors, 0);
1441
1442 let n_worktrees = 6;
1443 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1444 let mut all_paths = Vec::new();
1445 for w in 0..n_worktrees {
1446 let worktree = temp_dir.path().join(format!("wt-{w}"));
1447 repo.materialize_tree(&tree, &worktree).unwrap();
1448 for i in 0..blob_count {
1449 all_paths.push(worktree.join(format!("file-{i}.txt")));
1450 }
1451 }
1452
1453 let mut inodes = HashSet::new();
1456 for path in &all_paths {
1457 inodes.insert(std::fs::metadata(path).unwrap().ino());
1458 }
1459 assert_eq!(
1460 inodes.len(),
1461 all_paths.len(),
1462 "every reflinked worktree file must have its own inode (got {} for {} files)",
1463 inodes.len(),
1464 all_paths.len()
1465 );
1466
1467 let mut canonical_inodes = HashSet::new();
1470 for hash in tree.entries().iter().map(|e| &e.hash) {
1471 if let Some(loose) = repo.store().loose_blob_path(hash) {
1472 canonical_inodes.insert(std::fs::metadata(&loose).unwrap().ino());
1473 }
1474 }
1475 for inode in &inodes {
1476 assert!(
1477 !canonical_inodes.contains(inode),
1478 "worktree file inode {} aliases the canonical loose blob — that's the hardlink bug",
1479 inode
1480 );
1481 }
1482
1483 eprintln!(
1484 "[packed-storage-win] n_worktrees={} blobs/tree={} reflink_path_confirmed=true",
1485 n_worktrees, blob_count
1486 );
1487 }
1488
1489 #[test]
1493 fn promote_to_loose_uncompressed_idempotent_on_loose_blob() {
1494 let temp_dir = TempDir::new().unwrap();
1495 let repo = Repository::init_default(temp_dir.path()).unwrap();
1496
1497 let blob = Blob::from("idempotent promote payload");
1498 let hash = repo.store().put_blob(&blob).unwrap();
1499 assert!(repo.store().loose_blob_path(&hash).is_some());
1501
1502 let did_work = repo.store().promote_to_loose_uncompressed(&hash).unwrap();
1503 assert!(
1504 !did_work,
1505 "promote on already-loose+uncompressed blob must be a no-op"
1506 );
1507 }
1508
1509 #[test]
1514 fn promote_to_loose_uncompressed_returns_error_for_missing_blob() {
1515 use objects::object::ContentHash;
1516
1517 let temp_dir = TempDir::new().unwrap();
1518 let repo = Repository::init_default(temp_dir.path()).unwrap();
1519
1520 let bogus = ContentHash::compute_typed("blob", b"never-stored");
1521 let result = repo.store().promote_to_loose_uncompressed(&bogus);
1522 assert!(
1523 result.is_err(),
1524 "promote on missing blob must error, got {:?}",
1525 result
1526 );
1527 }
1528}