1use std::fs;
20use std::io::{self, Read};
21use std::path::{Path, PathBuf};
22
23use crate::chunker::{ChunkIterator, FastCdc};
24use crate::hash::Hash;
25use crate::ignore::{self, IgnoreList};
26use crate::index::{self, Index};
27use crate::object::{ChunkedBlob, EntryMode, Object, Tree, TreeEntry};
28use crate::serialize;
29use crate::store::ObjectStore;
30
31pub const CHUNK_THRESHOLD: u64 = 1024 * 1024;
33
34pub const MAX_FILE_BYTES: u64 = 1024 * 1024 * 1024;
36
37#[derive(Debug, thiserror::Error)]
39pub enum WorktreeError {
40 #[error("symlink target '{0}' is invalid (absolute or contains '..')")]
42 InvalidSymlinkTarget(String),
43 #[error("file '{0}' exceeds the {MAX_FILE_BYTES} byte limit")]
45 FileTooLarge(PathBuf),
46 #[error("path component is not valid UTF-8")]
48 InvalidUtf8,
49 #[error(transparent)]
51 Io(#[from] io::Error),
52 #[error(transparent)]
54 Object(#[from] crate::object::MkitError),
55 #[error(transparent)]
57 Store(#[from] crate::store::StoreError),
58}
59
60pub type WorktreeResult<T> = Result<T, WorktreeError>;
62
63#[must_use]
66pub fn validate_symlink_target(target: &str) -> bool {
67 if target.is_empty() {
68 return false;
69 }
70 if target.starts_with('/') {
71 return false;
72 }
73 for part in target.split('/') {
74 if part == ".." {
75 return false;
76 }
77 }
78 true
79}
80
81pub fn build_tree(store: &ObjectStore, dir: &Path) -> WorktreeResult<Hash> {
94 build_tree_filtered(store, dir, None)
95}
96
97pub fn build_tree_filtered(
106 store: &ObjectStore,
107 dir: &Path,
108 index: Option<&Index>,
109) -> WorktreeResult<Hash> {
110 let ignores = ignore::load(dir).map_err(|e| match e {
111 crate::ignore::IgnoreError::Io(io) => WorktreeError::Io(io),
112 crate::ignore::IgnoreError::FileTooLarge => {
113 WorktreeError::Io(io::Error::other("ignore file exceeds 1 MiB"))
114 }
115 })?;
116 let loaded;
119 let index = if let Some(i) = index {
120 i
121 } else {
122 loaded = index::read_index(dir).unwrap_or_default();
123 &loaded
124 };
125 build_tree_inner(store, dir, "", &ignores, index, false)
126}
127
128fn build_tree_inner(
134 store: &ObjectStore,
135 dir: &Path,
136 rel_dir: &str,
137 ignores: &IgnoreList,
138 index: &Index,
139 parent_ignored: bool,
140) -> WorktreeResult<Hash> {
141 let mut entries: Vec<TreeEntry> = Vec::new();
142
143 for entry in fs::read_dir(dir)? {
144 let entry = entry?;
145 let file_name = entry.file_name();
146 let name_str = file_name
147 .to_str()
148 .ok_or(WorktreeError::InvalidUtf8)?
149 .to_string();
150 let meta = entry.path().symlink_metadata()?;
152 let is_dir = meta.is_dir();
153 let rel_path = if rel_dir.is_empty() {
154 name_str.clone()
155 } else {
156 format!("{rel_dir}/{name_str}")
157 };
158 let entry_ignored = parent_ignored || ignores.is_ignored(&rel_path, is_dir);
163 if entry_ignored && !index.tracks_path_or_descendant(&rel_path) {
164 continue;
165 }
166
167 let name_bytes = name_str.as_bytes();
168 if !TreeEntry::validate_name(name_bytes) {
169 return Err(WorktreeError::Io(io::Error::new(
170 io::ErrorKind::InvalidInput,
171 format!("invalid tree entry name: {name_str:?}"),
172 )));
173 }
174
175 if meta.file_type().is_file() {
176 let (h, opened_meta) = hash_file_with_metadata(store, &entry.path())?;
177 entries.push(TreeEntry {
178 name: name_str.into_bytes(),
179 mode: entry_mode_from_file_metadata(&opened_meta),
180 object_hash: h,
181 });
182 } else if meta.file_type().is_dir() {
183 if index.has_tracked_file_at(&rel_path) {
189 continue;
190 }
191 let h = build_tree_inner(
192 store,
193 &entry.path(),
194 &rel_path,
195 ignores,
196 index,
197 entry_ignored,
198 )?;
199 entries.push(TreeEntry {
200 name: name_str.into_bytes(),
201 mode: EntryMode::Tree,
202 object_hash: h,
203 });
204 } else if meta.file_type().is_symlink() {
205 let target = fs::read_link(entry.path())?;
206 let target_str = target
207 .to_str()
208 .ok_or(WorktreeError::InvalidUtf8)?
209 .to_string();
210 if !validate_symlink_target(&target_str) {
211 return Err(WorktreeError::InvalidSymlinkTarget(target_str));
212 }
213 let blob = Object::Blob(crate::object::Blob {
214 data: target_str.as_bytes().to_vec(),
215 });
216 let bytes = serialize::serialize(&blob)?;
217 let h = store.write(&bytes)?;
218 entries.push(TreeEntry {
219 name: name_str.into_bytes(),
220 mode: EntryMode::Symlink,
221 object_hash: h,
222 });
223 } else {
224 }
226 }
227
228 entries.sort_by(|a, b| a.name.cmp(&b.name));
229 let tree = Object::Tree(Tree { entries });
230 let bytes = serialize::serialize(&tree)?;
231 Ok(store.write(&bytes)?)
232}
233
234#[allow(clippy::items_after_statements, clippy::too_many_lines)]
255pub fn build_tree_from_index(
256 store: &ObjectStore,
257 index: &crate::index::Index,
258) -> WorktreeResult<Hash> {
259 use crate::index::EntryStatus;
260
261 #[derive(Default)]
264 struct Node {
265 children: std::collections::BTreeMap<String, Node>,
267 leaves: std::collections::BTreeMap<String, (EntryMode, Hash)>,
269 }
270
271 let mut root = Node::default();
272 let mut seen_paths = std::collections::HashSet::with_capacity(index.entries.len());
273
274 for entry in &index.entries {
275 if !seen_paths.insert(entry.path.as_str()) {
276 return Err(WorktreeError::Io(io::Error::other(format!(
277 "duplicate index path: '{}'",
278 entry.path
279 ))));
280 }
281 if entry.status == EntryStatus::Removed {
282 continue;
283 }
284 let mode = match entry.status {
285 EntryStatus::Blob => EntryMode::Blob,
286 EntryStatus::Executable => EntryMode::Executable,
287 EntryStatus::Symlink => EntryMode::Symlink,
288 EntryStatus::Tree => {
289 return Err(WorktreeError::Io(io::Error::other(
293 "index entry uses reserved Tree status (subtree staging not implemented)",
294 )));
295 }
296 EntryStatus::Removed => unreachable!("filtered above"),
297 };
298 match store.read_object(&entry.object_hash)? {
306 Object::Blob(_) => {}
307 Object::ChunkedBlob(_) if mode != EntryMode::Symlink => {}
308 other => {
309 return Err(WorktreeError::Io(io::Error::other(format!(
310 "index entry '{}' points to a non-blob object (got {})",
311 entry.path,
312 other.object_type().name()
313 ))));
314 }
315 }
316
317 let segments: Vec<&str> = entry.path.split('/').collect();
319 let Some((leaf, dirs)) = segments.split_last() else {
320 return Err(WorktreeError::Io(io::Error::other("empty index path")));
321 };
322 if leaf.is_empty() {
323 return Err(WorktreeError::Io(io::Error::other(
324 "trailing slash in index path",
325 )));
326 }
327
328 let mut node = &mut root;
329 let mut walked = String::new();
330 for seg in dirs {
331 if seg.is_empty() {
332 return Err(WorktreeError::Io(io::Error::other(
333 "empty path segment in index",
334 )));
335 }
336 if node.leaves.contains_key(*seg) {
343 let conflicting = if walked.is_empty() {
344 (*seg).to_string()
345 } else {
346 format!("{walked}/{seg}")
347 };
348 return Err(WorktreeError::Io(io::Error::other(format!(
349 "index path conflict: '{conflicting}' is staged as both a file and a directory"
350 ))));
351 }
352 walked = if walked.is_empty() {
353 (*seg).to_string()
354 } else {
355 format!("{walked}/{seg}")
356 };
357 node = node.children.entry((*seg).to_string()).or_default();
358 }
359 if node.children.contains_key(*leaf) {
363 let conflicting = if walked.is_empty() {
364 (*leaf).to_string()
365 } else {
366 format!("{walked}/{leaf}")
367 };
368 return Err(WorktreeError::Io(io::Error::other(format!(
369 "index path conflict: '{conflicting}' is staged as both a file and a directory"
370 ))));
371 }
372 if node
373 .leaves
374 .insert((*leaf).to_string(), (mode, entry.object_hash))
375 .is_some()
376 {
377 let duplicate = if walked.is_empty() {
378 (*leaf).to_string()
379 } else {
380 format!("{walked}/{leaf}")
381 };
382 return Err(WorktreeError::Io(io::Error::other(format!(
383 "duplicate index path: '{duplicate}'"
384 ))));
385 }
386 }
387
388 fn write_node(store: &ObjectStore, node: &Node) -> WorktreeResult<Hash> {
389 let mut entries: Vec<TreeEntry> = Vec::new();
390
391 for (name, child) in &node.children {
393 let h = write_node(store, child)?;
394 let bytes = name.as_bytes().to_vec();
395 if !crate::object::TreeEntry::validate_name(&bytes) {
396 return Err(WorktreeError::Io(io::Error::other(format!(
397 "invalid tree entry name: {name:?}"
398 ))));
399 }
400 entries.push(TreeEntry {
401 name: bytes,
402 mode: EntryMode::Tree,
403 object_hash: h,
404 });
405 }
406
407 for (name, (mode, hash)) in &node.leaves {
409 let bytes = name.as_bytes().to_vec();
410 if !crate::object::TreeEntry::validate_name(&bytes) {
411 return Err(WorktreeError::Io(io::Error::other(format!(
412 "invalid tree entry name: {name:?}"
413 ))));
414 }
415 entries.push(TreeEntry {
416 name: bytes,
417 mode: *mode,
418 object_hash: *hash,
419 });
420 }
421
422 entries.sort_by(|a, b| a.name.cmp(&b.name));
424 let tree = Object::Tree(Tree { entries });
425 let bytes = serialize::serialize(&tree)?;
426 Ok(store.write(&bytes)?)
427 }
428
429 write_node(store, &root)
430}
431
432pub fn hash_file(store: &ObjectStore, path: &Path) -> WorktreeResult<Hash> {
445 hash_file_with_metadata(store, path).map(|(hash, _)| hash)
446}
447
448pub fn read_regular_file_bounded(path: &Path) -> WorktreeResult<(fs::Metadata, Vec<u8>)> {
452 let mut file = open_regular_file(path)?;
453 let meta = file.metadata()?;
454 if !meta.file_type().is_file() {
455 return Err(WorktreeError::Io(io::Error::new(
456 io::ErrorKind::InvalidInput,
457 "path is not a regular file",
458 )));
459 }
460 if meta.len() > MAX_FILE_BYTES {
461 return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
462 }
463 let initial_capacity = usize::try_from(meta.len().min(CHUNK_THRESHOLD))
464 .map_err(|_| WorktreeError::FileTooLarge(path.to_path_buf()))?;
465 let mut data = Vec::with_capacity(initial_capacity);
466 file.by_ref()
467 .take(MAX_FILE_BYTES + 1)
468 .read_to_end(&mut data)?;
469 if u64::try_from(data.len()).unwrap_or(u64::MAX) > MAX_FILE_BYTES {
470 return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
471 }
472 Ok((meta, data))
473}
474
475fn hash_file_with_metadata(
476 store: &ObjectStore,
477 path: &Path,
478) -> WorktreeResult<(Hash, fs::Metadata)> {
479 let (meta, data) = read_regular_file_bounded(path)?;
480 let hash = store_file_object(store, &data)?;
481 Ok((hash, meta))
482}
483
484pub fn store_file_object(store: &ObjectStore, data: &[u8]) -> WorktreeResult<Hash> {
499 if u64::try_from(data.len()).unwrap_or(u64::MAX) <= CHUNK_THRESHOLD {
500 let blob = Object::Blob(crate::object::Blob {
501 data: data.to_vec(),
502 });
503 let bytes = serialize::serialize(&blob)?;
504 return Ok(store.write(&bytes)?);
505 }
506
507 let total_size = data.len() as u64;
513 let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), data)
514 .map(|b| {
515 let chunk_blob = Object::Blob(crate::object::Blob {
516 data: data[b.offset..b.offset + b.length].to_vec(),
517 });
518 let chunk_bytes = serialize::serialize(&chunk_blob)?;
519 Ok::<_, WorktreeError>(store.write(&chunk_bytes)?)
520 })
521 .collect::<Result<_, _>>()?;
522
523 let manifest = Object::ChunkedBlob(ChunkedBlob {
524 total_size,
525 chunk_size: 0, chunks,
527 });
528 let manifest_bytes = serialize::serialize(&manifest)?;
529 Ok(store.write(&manifest_bytes)?)
530}
531
532pub fn read_blob(store: &ObjectStore, hash: &Hash) -> WorktreeResult<Vec<u8>> {
547 match store.read_object(hash)? {
548 Object::Blob(b) => Ok(b.data),
549 Object::ChunkedBlob(manifest) => {
550 let mut data = Vec::with_capacity(usize::try_from(manifest.total_size).unwrap_or(0));
551 for chunk in &manifest.chunks {
552 match store.read_object(chunk)? {
553 Object::Blob(b) => data.extend_from_slice(&b.data),
554 other => {
555 return Err(WorktreeError::Io(io::Error::other(format!(
556 "chunk {} is not a blob (got {})",
557 crate::hash::to_hex(chunk),
558 other.object_type().name()
559 ))));
560 }
561 }
562 }
563 Ok(data)
564 }
565 other => Err(WorktreeError::Io(io::Error::other(format!(
566 "object {} is not a blob (got {})",
567 crate::hash::to_hex(hash),
568 other.object_type().name()
569 )))),
570 }
571}
572
573#[cfg(unix)]
574fn open_regular_file(path: &Path) -> io::Result<fs::File> {
575 use std::os::unix::fs::OpenOptionsExt;
576
577 fs::OpenOptions::new()
578 .read(true)
579 .custom_flags(libc::O_NOFOLLOW)
580 .open(path)
581}
582
583#[cfg(not(unix))]
584fn open_regular_file(path: &Path) -> io::Result<fs::File> {
585 let meta = path.symlink_metadata()?;
589 if !meta.file_type().is_file() {
590 return Err(io::Error::new(
591 io::ErrorKind::InvalidInput,
592 "path is not a regular file",
593 ));
594 }
595 fs::File::open(path)
596}
597
598#[cfg(unix)]
599fn entry_mode_from_file_metadata(meta: &fs::Metadata) -> EntryMode {
600 use std::os::unix::fs::PermissionsExt;
601
602 if meta.permissions().mode() & 0o111 != 0 {
603 EntryMode::Executable
604 } else {
605 EntryMode::Blob
606 }
607}
608
609#[cfg(not(unix))]
610fn entry_mode_from_file_metadata(_meta: &fs::Metadata) -> EntryMode {
611 EntryMode::Blob
612}
613
614#[cfg(test)]
615mod tests {
616 use super::*;
617 use crate::object::ObjectType;
618 use tempfile::TempDir;
619
620 fn fresh_store() -> (TempDir, ObjectStore) {
621 let dir = TempDir::new().unwrap();
622 let store = ObjectStore::init(dir.path()).unwrap();
623 (dir, store)
624 }
625
626 #[test]
627 fn validate_symlink_targets() {
628 assert!(validate_symlink_target("hello"));
629 assert!(validate_symlink_target("sub/dir/file"));
630 assert!(!validate_symlink_target(""));
631 assert!(!validate_symlink_target("/etc/passwd"));
632 assert!(!validate_symlink_target("../escape"));
633 assert!(!validate_symlink_target("a/../b"));
634 }
635
636 #[test]
637 fn build_tree_from_empty_dir() {
638 let (_sd, store) = fresh_store();
639 let work = TempDir::new().unwrap();
640 let h = build_tree(&store, work.path()).unwrap();
641 let obj = store.read_object(&h).unwrap();
642 match obj {
643 Object::Tree(t) => assert_eq!(t.entries.len(), 0),
644 other => panic!("expected tree, got {other:?}"),
645 }
646 }
647
648 #[test]
649 fn build_tree_with_single_file() {
650 let (_sd, store) = fresh_store();
651 let work = TempDir::new().unwrap();
652 fs::write(work.path().join("hello.txt"), b"hello world").unwrap();
653 let h = build_tree(&store, work.path()).unwrap();
654 let obj = store.read_object(&h).unwrap();
655 let Object::Tree(t) = obj else {
656 panic!("expected tree");
657 };
658 assert_eq!(t.entries.len(), 1);
659 assert_eq!(t.entries[0].name.as_slice(), b"hello.txt");
660 assert_eq!(t.entries[0].mode, EntryMode::Blob);
661 let blob_obj = store.read_object(&t.entries[0].object_hash).unwrap();
662 let Object::Blob(b) = blob_obj else {
663 panic!("expected blob");
664 };
665 assert_eq!(b.data, b"hello world");
666 }
667
668 #[cfg(unix)]
669 #[test]
670 fn build_tree_marks_executable_regular_files() {
671 use std::os::unix::fs::PermissionsExt;
672
673 let (_sd, store) = fresh_store();
674 let work = TempDir::new().unwrap();
675 let script = work.path().join("run.sh");
676 fs::write(&script, b"#!/bin/sh\n").unwrap();
677 let mut perms = fs::metadata(&script).unwrap().permissions();
678 perms.set_mode(perms.mode() | 0o111);
679 fs::set_permissions(&script, perms).unwrap();
680
681 let h = build_tree(&store, work.path()).unwrap();
682 let Object::Tree(t) = store.read_object(&h).unwrap() else {
683 panic!("expected tree");
684 };
685 assert_eq!(t.entries[0].name.as_slice(), b"run.sh");
686 assert_eq!(t.entries[0].mode, EntryMode::Executable);
687 }
688
689 #[cfg(unix)]
690 #[test]
691 fn build_tree_rejects_invalid_entry_name_before_writing_tree() {
692 let (_sd, store) = fresh_store();
693 let work = TempDir::new().unwrap();
694 fs::write(work.path().join("bad."), b"bad name").unwrap();
695
696 let err = build_tree(&store, work.path()).unwrap_err();
697 assert!(matches!(err, WorktreeError::Io(_)));
698 }
699
700 #[cfg(unix)]
701 #[test]
702 fn hash_file_rejects_final_component_symlink() {
703 use std::os::unix::fs::symlink;
704
705 let (_sd, store) = fresh_store();
706 let work = TempDir::new().unwrap();
707 fs::write(work.path().join("target.txt"), b"target").unwrap();
708 symlink("target.txt", work.path().join("link.txt")).unwrap();
709
710 let err = hash_file(&store, &work.path().join("link.txt")).unwrap_err();
711 assert!(matches!(err, WorktreeError::Io(_)));
712 }
713
714 #[test]
715 fn build_tree_with_nested_directories() {
716 let (_sd, store) = fresh_store();
717 let work = TempDir::new().unwrap();
718 fs::write(work.path().join("a.txt"), b"file a").unwrap();
719 fs::create_dir(work.path().join("subdir")).unwrap();
720 fs::write(work.path().join("subdir/b.txt"), b"file b").unwrap();
721 let h = build_tree(&store, work.path()).unwrap();
722 let obj = store.read_object(&h).unwrap();
723 let Object::Tree(t) = obj else {
724 panic!("expected tree");
725 };
726 assert_eq!(t.entries.len(), 2);
727 assert_eq!(t.entries[0].name.as_slice(), b"a.txt");
729 assert_eq!(t.entries[1].name.as_slice(), b"subdir");
730 assert_eq!(t.entries[1].mode, EntryMode::Tree);
731 let sub = store.read_object(&t.entries[1].object_hash).unwrap();
732 let Object::Tree(st) = sub else {
733 panic!("expected tree");
734 };
735 assert_eq!(st.entries.len(), 1);
736 assert_eq!(st.entries[0].name.as_slice(), b"b.txt");
737 }
738
739 #[test]
740 fn build_tree_skips_mkit_directory() {
741 let (_sd, store) = fresh_store();
742 let work = TempDir::new().unwrap();
743 fs::create_dir(work.path().join(".mkit")).unwrap();
744 fs::write(work.path().join(".mkit/should_skip"), b"").unwrap();
745 fs::write(work.path().join("keep.txt"), b"kept").unwrap();
746 let h = build_tree(&store, work.path()).unwrap();
747 let obj = store.read_object(&h).unwrap();
748 let Object::Tree(t) = obj else {
749 panic!("expected tree");
750 };
751 assert_eq!(t.entries.len(), 1);
752 assert_eq!(t.entries[0].name.as_slice(), b"keep.txt");
753 }
754
755 #[test]
756 fn build_tree_is_deterministic() {
757 let (_sd, store) = fresh_store();
758 let work = TempDir::new().unwrap();
759 fs::write(work.path().join("z.txt"), b"z").unwrap();
760 fs::write(work.path().join("a.txt"), b"a").unwrap();
761 let h1 = build_tree(&store, work.path()).unwrap();
762 let h2 = build_tree(&store, work.path()).unwrap();
763 assert_eq!(h1, h2);
764 }
765
766 #[test]
767 fn build_tree_respects_mkitignore() {
768 let (_sd, store) = fresh_store();
769 let work = TempDir::new().unwrap();
770 fs::write(work.path().join(".mkitignore"), b"*.log\n").unwrap();
771 fs::write(work.path().join("keep.txt"), b"kept").unwrap();
772 fs::write(work.path().join("debug.log"), b"ignored").unwrap();
773 let h = build_tree(&store, work.path()).unwrap();
774 let obj = store.read_object(&h).unwrap();
775 let Object::Tree(t) = obj else {
776 panic!("expected tree");
777 };
778 assert_eq!(t.entries.len(), 2);
780 assert_eq!(t.entries[0].name.as_slice(), b".mkitignore");
781 assert_eq!(t.entries[1].name.as_slice(), b"keep.txt");
782 }
783
784 #[cfg(unix)]
785 #[test]
786 fn rejects_invalid_symlink_targets() {
787 use std::os::unix::fs::symlink;
788 let (_sd, store) = fresh_store();
789 let work = TempDir::new().unwrap();
790 symlink("/etc/passwd", work.path().join("bad-link")).unwrap();
791 let err = build_tree(&store, work.path()).unwrap_err();
792 assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
793 }
794
795 #[cfg(unix)]
796 #[test]
797 fn rejects_dotdot_symlink_targets() {
798 use std::os::unix::fs::symlink;
799 let (_sd, store) = fresh_store();
800 let work = TempDir::new().unwrap();
801 symlink("../../etc/passwd", work.path().join("bad-link")).unwrap();
802 let err = build_tree(&store, work.path()).unwrap_err();
803 assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
804 }
805
806 #[test]
807 fn small_file_stays_as_regular_blob() {
808 let (_sd, store) = fresh_store();
809 let work = TempDir::new().unwrap();
810 fs::write(work.path().join("small.txt"), b"hello world").unwrap();
811 let h = build_tree(&store, work.path()).unwrap();
812 let obj = store.read_object(&h).unwrap();
813 let Object::Tree(t) = obj else {
814 panic!("expected tree");
815 };
816 let entry = store.read_object(&t.entries[0].object_hash).unwrap();
817 assert_eq!(entry.object_type(), ObjectType::Blob);
818 }
819
820 #[test]
821 fn large_file_becomes_chunked_blob() {
822 let (_sd, store) = fresh_store();
827 let work = TempDir::new().unwrap();
828 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
829 let mut big = Vec::with_capacity(n);
830 let mut state: u64 = 0x00C0_FFEE;
831 for _ in 0..n {
832 state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
834 let mut z = state;
835 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
836 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
837 z ^= z >> 31;
838 big.push((z & 0xFF) as u8);
839 }
840 fs::write(work.path().join("big.bin"), &big).unwrap();
841
842 let tree_hash = build_tree(&store, work.path()).unwrap();
843 let Object::Tree(t) = store.read_object(&tree_hash).unwrap() else {
844 panic!("expected tree");
845 };
846 assert_eq!(t.entries.len(), 1);
847
848 let entry_hash = t.entries[0].object_hash;
849 let entry = store.read_object(&entry_hash).unwrap();
850 let Object::ChunkedBlob(manifest) = entry else {
851 panic!("expected chunked_blob, got {entry:?}");
852 };
853
854 assert_eq!(manifest.total_size, n as u64);
855 assert_eq!(manifest.chunk_size, 0, "0 = content-defined (FastCDC)");
856 assert!(!manifest.chunks.is_empty());
857 let mut reassembled: Vec<u8> = Vec::with_capacity(n);
860 for h in &manifest.chunks {
861 let Object::Blob(b) = store.read_object(h).unwrap() else {
862 panic!("chunk did not resolve to a Blob");
863 };
864 reassembled.extend_from_slice(&b.data);
865 }
866 assert_eq!(reassembled, big, "chunks must round-trip the source");
867 }
868
869 use crate::index::{EntryStatus, Index, IndexEntry};
872
873 fn write_blob(store: &ObjectStore, bytes: &[u8]) -> Hash {
874 let blob = Object::Blob(crate::object::Blob {
875 data: bytes.to_vec(),
876 });
877 let body = serialize::serialize(&blob).unwrap();
878 store.write(&body).unwrap()
879 }
880
881 #[test]
882 fn from_index_empty_returns_empty_tree() {
883 let (_sd, store) = fresh_store();
884 let idx = Index::new();
885 let h = build_tree_from_index(&store, &idx).unwrap();
886 let Object::Tree(t) = store.read_object(&h).unwrap() else {
887 panic!("expected tree");
888 };
889 assert!(t.entries.is_empty());
890 }
891
892 #[test]
893 fn from_index_single_file_at_root() {
894 let (_sd, store) = fresh_store();
895 let blob_hash = write_blob(&store, b"hello world");
896 let mut idx = Index::new();
897 idx.entries.push(IndexEntry {
898 path: "hello.txt".into(),
899 status: EntryStatus::Blob,
900 object_hash: blob_hash,
901 });
902 let h = build_tree_from_index(&store, &idx).unwrap();
903 let Object::Tree(t) = store.read_object(&h).unwrap() else {
904 panic!();
905 };
906 assert_eq!(t.entries.len(), 1);
907 assert_eq!(t.entries[0].name, b"hello.txt");
908 assert_eq!(t.entries[0].mode, EntryMode::Blob);
909 assert_eq!(t.entries[0].object_hash, blob_hash);
910 }
911
912 #[test]
913 fn from_index_nested_paths_build_subtrees() {
914 let (_sd, store) = fresh_store();
915 let a = write_blob(&store, b"file a");
916 let b = write_blob(&store, b"file b");
917 let mut idx = Index::new();
918 idx.entries.push(IndexEntry {
919 path: "a.txt".into(),
920 status: EntryStatus::Blob,
921 object_hash: a,
922 });
923 idx.entries.push(IndexEntry {
924 path: "subdir/b.txt".into(),
925 status: EntryStatus::Blob,
926 object_hash: b,
927 });
928 let root_hash = build_tree_from_index(&store, &idx).unwrap();
929 let Object::Tree(root) = store.read_object(&root_hash).unwrap() else {
930 panic!();
931 };
932 assert_eq!(root.entries.len(), 2);
933 assert_eq!(root.entries[0].name, b"a.txt");
934 assert_eq!(root.entries[0].mode, EntryMode::Blob);
935 assert_eq!(root.entries[1].name, b"subdir");
936 assert_eq!(root.entries[1].mode, EntryMode::Tree);
937
938 let Object::Tree(sub) = store.read_object(&root.entries[1].object_hash).unwrap() else {
939 panic!();
940 };
941 assert_eq!(sub.entries.len(), 1);
942 assert_eq!(sub.entries[0].name, b"b.txt");
943 assert_eq!(sub.entries[0].object_hash, b);
944 }
945
946 #[test]
947 fn from_index_removed_entries_are_skipped() {
948 let (_sd, store) = fresh_store();
949 let a = write_blob(&store, b"keep me");
950 let mut idx = Index::new();
951 idx.entries.push(IndexEntry {
952 path: "keep.txt".into(),
953 status: EntryStatus::Blob,
954 object_hash: a,
955 });
956 idx.entries.push(IndexEntry {
957 path: "drop.txt".into(),
958 status: EntryStatus::Removed,
959 object_hash: [0; 32],
960 });
961 let h = build_tree_from_index(&store, &idx).unwrap();
962 let Object::Tree(t) = store.read_object(&h).unwrap() else {
963 panic!();
964 };
965 assert_eq!(t.entries.len(), 1);
966 assert_eq!(t.entries[0].name, b"keep.txt");
967 }
968
969 #[test]
970 fn from_index_executable_and_symlink_modes_pass_through() {
971 let (_sd, store) = fresh_store();
972 let exec = write_blob(&store, b"#!/bin/sh");
973 let link = write_blob(&store, b"target.txt");
974 let mut idx = Index::new();
975 idx.entries.push(IndexEntry {
976 path: "run.sh".into(),
977 status: EntryStatus::Executable,
978 object_hash: exec,
979 });
980 idx.entries.push(IndexEntry {
981 path: "link".into(),
982 status: EntryStatus::Symlink,
983 object_hash: link,
984 });
985 let h = build_tree_from_index(&store, &idx).unwrap();
986 let Object::Tree(t) = store.read_object(&h).unwrap() else {
987 panic!();
988 };
989 let by_name: std::collections::HashMap<&[u8], &TreeEntry> =
990 t.entries.iter().map(|e| (e.name.as_slice(), e)).collect();
991 assert_eq!(by_name[&b"run.sh"[..]].mode, EntryMode::Executable);
992 assert_eq!(by_name[&b"link"[..]].mode, EntryMode::Symlink);
993 }
994
995 #[test]
996 fn from_index_entries_are_sorted_by_name() {
997 let (_sd, store) = fresh_store();
998 let a = write_blob(&store, b"x");
999 let mut idx = Index::new();
1000 idx.entries.push(IndexEntry {
1003 path: "z.txt".into(),
1004 status: EntryStatus::Blob,
1005 object_hash: a,
1006 });
1007 idx.entries.push(IndexEntry {
1008 path: "a.txt".into(),
1009 status: EntryStatus::Blob,
1010 object_hash: a,
1011 });
1012 idx.entries.push(IndexEntry {
1013 path: "m.txt".into(),
1014 status: EntryStatus::Blob,
1015 object_hash: a,
1016 });
1017 let h = build_tree_from_index(&store, &idx).unwrap();
1018 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1019 panic!();
1020 };
1021 let names: Vec<&[u8]> = t.entries.iter().map(|e| e.name.as_slice()).collect();
1022 assert_eq!(names, vec![&b"a.txt"[..], b"m.txt", b"z.txt"]);
1023 }
1024
1025 #[test]
1026 fn from_index_rejects_trailing_slash() {
1027 let (_sd, store) = fresh_store();
1028 let h = write_blob(&store, b"x");
1029 let mut idx = Index::new();
1030 idx.entries.push(IndexEntry {
1031 path: "dir/".into(),
1032 status: EntryStatus::Blob,
1033 object_hash: h,
1034 });
1035 let err = build_tree_from_index(&store, &idx).unwrap_err();
1036 assert!(matches!(err, WorktreeError::Io(_)));
1037 }
1038
1039 #[test]
1040 fn from_index_rejects_empty_segment() {
1041 let (_sd, store) = fresh_store();
1042 let h = write_blob(&store, b"x");
1043 let mut idx = Index::new();
1044 idx.entries.push(IndexEntry {
1045 path: "a//b.txt".into(),
1046 status: EntryStatus::Blob,
1047 object_hash: h,
1048 });
1049 let err = build_tree_from_index(&store, &idx).unwrap_err();
1050 assert!(matches!(err, WorktreeError::Io(_)));
1051 }
1052
1053 #[test]
1054 fn from_index_rejects_reserved_name() {
1055 let (_sd, store) = fresh_store();
1056 let h = write_blob(&store, b"x");
1057 let mut idx = Index::new();
1058 idx.entries.push(IndexEntry {
1061 path: ".mkit".into(),
1062 status: EntryStatus::Blob,
1063 object_hash: h,
1064 });
1065 let err = build_tree_from_index(&store, &idx).unwrap_err();
1066 assert!(matches!(err, WorktreeError::Io(_)));
1067 }
1068
1069 #[test]
1075 fn from_index_matches_build_tree_for_equivalent_worktree() {
1076 let (_sd, store) = fresh_store();
1077
1078 let work = TempDir::new().unwrap();
1083 fs::write(work.path().join("a.txt"), b"alpha").unwrap();
1084 fs::create_dir(work.path().join("dir")).unwrap();
1085 fs::write(work.path().join("dir/b.txt"), b"beta").unwrap();
1086 fs::write(work.path().join("dir/c.txt"), b"gamma").unwrap();
1087 let worktree_root = build_tree(&store, work.path()).unwrap();
1088
1089 let a = write_blob(&store, b"alpha");
1090 let b = write_blob(&store, b"beta");
1091 let c = write_blob(&store, b"gamma");
1092 let mut idx = Index::new();
1093 idx.entries.push(IndexEntry {
1094 path: "a.txt".into(),
1095 status: EntryStatus::Blob,
1096 object_hash: a,
1097 });
1098 idx.entries.push(IndexEntry {
1099 path: "dir/b.txt".into(),
1100 status: EntryStatus::Blob,
1101 object_hash: b,
1102 });
1103 idx.entries.push(IndexEntry {
1104 path: "dir/c.txt".into(),
1105 status: EntryStatus::Blob,
1106 object_hash: c,
1107 });
1108 let index_root = build_tree_from_index(&store, &idx).unwrap();
1109
1110 assert_eq!(
1111 worktree_root, index_root,
1112 "build_tree_from_index must produce the same root hash as build_tree for equivalent contents"
1113 );
1114 }
1115
1116 #[test]
1117 fn from_index_deeply_nested_paths_build_chain_of_subtrees() {
1118 let (_sd, store) = fresh_store();
1119 let h = write_blob(&store, b"deep");
1120 let mut idx = Index::new();
1121 idx.entries.push(IndexEntry {
1122 path: "a/b/c/d/e.txt".into(),
1123 status: EntryStatus::Blob,
1124 object_hash: h,
1125 });
1126 let root = build_tree_from_index(&store, &idx).unwrap();
1127 let Object::Tree(t) = store.read_object(&root).unwrap() else {
1128 panic!();
1129 };
1130 assert_eq!(t.entries.len(), 1);
1131 assert_eq!(t.entries[0].name, b"a");
1132 assert_eq!(t.entries[0].mode, EntryMode::Tree);
1133 let mut cursor = t.entries[0].object_hash;
1135 for seg in [b"b" as &[u8], b"c", b"d"] {
1136 let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1137 panic!();
1138 };
1139 assert_eq!(t.entries.len(), 1);
1140 assert_eq!(t.entries[0].name, seg);
1141 cursor = t.entries[0].object_hash;
1142 }
1143 let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1144 panic!();
1145 };
1146 assert_eq!(t.entries[0].name, b"e.txt");
1147 assert_eq!(t.entries[0].object_hash, h);
1148 }
1149
1150 #[test]
1158 fn from_index_rejects_blob_then_subdir_collision() {
1159 let (_sd, store) = fresh_store();
1160 let h = write_blob(&store, b"x");
1161 let mut idx = Index::new();
1162 idx.entries.push(IndexEntry {
1163 path: "a".into(),
1164 status: EntryStatus::Blob,
1165 object_hash: h,
1166 });
1167 idx.entries.push(IndexEntry {
1168 path: "a/b".into(),
1169 status: EntryStatus::Blob,
1170 object_hash: h,
1171 });
1172 let err = build_tree_from_index(&store, &idx).unwrap_err();
1173 let msg = format!("{err}");
1174 assert!(
1175 msg.contains("conflict") || msg.contains("collision") || msg.contains("'a'"),
1176 "expected collision error mentioning the path, got: {msg}"
1177 );
1178 }
1179
1180 #[test]
1183 fn from_index_rejects_subdir_then_blob_collision() {
1184 let (_sd, store) = fresh_store();
1185 let h = write_blob(&store, b"x");
1186 let mut idx = Index::new();
1187 idx.entries.push(IndexEntry {
1188 path: "a/b".into(),
1189 status: EntryStatus::Blob,
1190 object_hash: h,
1191 });
1192 idx.entries.push(IndexEntry {
1193 path: "a".into(),
1194 status: EntryStatus::Blob,
1195 object_hash: h,
1196 });
1197 assert!(build_tree_from_index(&store, &idx).is_err());
1198 }
1199
1200 #[test]
1201 fn from_index_rejects_duplicate_exact_path() {
1202 let (_sd, store) = fresh_store();
1203 let a = write_blob(&store, b"a");
1204 let b = write_blob(&store, b"b");
1205 let mut idx = Index::new();
1206 idx.entries.push(IndexEntry {
1207 path: "same.txt".into(),
1208 status: EntryStatus::Blob,
1209 object_hash: a,
1210 });
1211 idx.entries.push(IndexEntry {
1212 path: "same.txt".into(),
1213 status: EntryStatus::Blob,
1214 object_hash: b,
1215 });
1216
1217 let err = build_tree_from_index(&store, &idx).unwrap_err();
1218 let msg = format!("{err}");
1219 assert!(msg.contains("duplicate index path"), "got: {msg}");
1220 }
1221
1222 #[test]
1223 fn from_index_rejects_duplicate_removed_and_live_path() {
1224 let (_sd, store) = fresh_store();
1225 let h = write_blob(&store, b"live");
1226 let mut idx = Index::new();
1227 idx.entries.push(IndexEntry {
1228 path: "same.txt".into(),
1229 status: EntryStatus::Removed,
1230 object_hash: [0; 32],
1231 });
1232 idx.entries.push(IndexEntry {
1233 path: "same.txt".into(),
1234 status: EntryStatus::Blob,
1235 object_hash: h,
1236 });
1237
1238 let err = build_tree_from_index(&store, &idx).unwrap_err();
1239 let msg = format!("{err}");
1240 assert!(msg.contains("duplicate index path"), "got: {msg}");
1241 }
1242
1243 #[test]
1250 fn from_index_all_removed_produces_empty_tree() {
1251 let (_sd, store) = fresh_store();
1252 let mut idx = Index::new();
1253 idx.entries.push(IndexEntry {
1254 path: "gone.txt".into(),
1255 status: EntryStatus::Removed,
1256 object_hash: [0; 32],
1257 });
1258 let h = build_tree_from_index(&store, &idx).unwrap();
1259 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1260 panic!();
1261 };
1262 assert!(t.entries.is_empty());
1263 }
1264
1265 #[test]
1268 fn from_index_root_is_a_tree_object() {
1269 let (_sd, store) = fresh_store();
1270 let idx = Index::new();
1271 let h = build_tree_from_index(&store, &idx).unwrap();
1272 let obj = store.read_object(&h).unwrap();
1273 assert_eq!(obj.object_type(), ObjectType::Tree);
1274 }
1275
1276 #[test]
1277 fn from_index_rejects_missing_blob_object() {
1278 let (_sd, store) = fresh_store();
1279 let mut idx = Index::new();
1280 idx.entries.push(IndexEntry {
1281 path: "missing.txt".into(),
1282 status: EntryStatus::Blob,
1283 object_hash: [42; 32],
1284 });
1285
1286 let err = build_tree_from_index(&store, &idx).unwrap_err();
1287 assert!(matches!(err, WorktreeError::Store(_)));
1288 }
1289
1290 #[test]
1291 fn from_index_rejects_non_blob_object_for_blob_status() {
1292 let (_sd, store) = fresh_store();
1293 let tree = Object::Tree(Tree { entries: vec![] });
1294 let body = serialize::serialize(&tree).unwrap();
1295 let tree_hash = store.write(&body).unwrap();
1296 let mut idx = Index::new();
1297 idx.entries.push(IndexEntry {
1298 path: "not-a-blob.txt".into(),
1299 status: EntryStatus::Blob,
1300 object_hash: tree_hash,
1301 });
1302
1303 let err = build_tree_from_index(&store, &idx).unwrap_err();
1304 let msg = format!("{err}");
1305 assert!(
1306 msg.contains("non-blob"),
1307 "expected non-blob index object error, got: {msg}"
1308 );
1309 }
1310
1311 #[test]
1318 fn from_index_accepts_chunked_blob_for_file_entry() {
1319 let (_sd, store) = fresh_store();
1320 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1323 let mut big = Vec::with_capacity(n);
1324 let mut state: u64 = 0x00C0_FFEE;
1325 for _ in 0..n {
1326 state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
1327 let mut z = state;
1328 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
1329 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
1330 z ^= z >> 31;
1331 big.push((z & 0xFF) as u8);
1332 }
1333 let chunked_hash = store_file_object(&store, &big).unwrap();
1334 assert!(
1335 matches!(
1336 store.read_object(&chunked_hash).unwrap(),
1337 Object::ChunkedBlob(_)
1338 ),
1339 "fixture must be a ChunkedBlob"
1340 );
1341
1342 let mut idx = Index::new();
1343 idx.entries.push(IndexEntry {
1344 path: "big.bin".into(),
1345 status: EntryStatus::Blob,
1346 object_hash: chunked_hash,
1347 });
1348 let root = build_tree_from_index(&store, &idx).unwrap();
1349 let Object::Tree(t) = store.read_object(&root).unwrap() else {
1350 panic!("expected tree");
1351 };
1352 assert_eq!(t.entries.len(), 1);
1353 assert_eq!(t.entries[0].name, b"big.bin");
1354 assert_eq!(t.entries[0].mode, EntryMode::Blob);
1355 assert_eq!(t.entries[0].object_hash, chunked_hash);
1356 assert_eq!(read_blob(&store, &chunked_hash).unwrap(), big);
1358 }
1359
1360 #[test]
1363 fn from_index_rejects_chunked_blob_for_symlink_entry() {
1364 let (_sd, store) = fresh_store();
1365 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1366 let big = vec![0xABu8; n];
1367 let chunked_hash = store_file_object(&store, &big).unwrap();
1368 let mut idx = Index::new();
1369 idx.entries.push(IndexEntry {
1370 path: "link".into(),
1371 status: EntryStatus::Symlink,
1372 object_hash: chunked_hash,
1373 });
1374 let err = build_tree_from_index(&store, &idx).unwrap_err();
1375 assert!(format!("{err}").contains("non-blob"));
1376 }
1377}