1use std::fs;
20use std::io::{self, Read};
21use std::path::{Path, PathBuf};
22
23use crate::chunker::{ChunkIterator, FastCdc};
24use crate::hash::Hash;
25use crate::ignore::{self, IgnoreList};
26use crate::index::{self, Index};
27use crate::object::{ChunkedBlob, EntryMode, Object, Tree, TreeEntry};
28use crate::serialize;
29use crate::store::{ObjectSink, ObjectStore};
30
31pub const CHUNK_THRESHOLD: u64 = 1024 * 1024;
33
34pub const MAX_FILE_BYTES: u64 = 1024 * 1024 * 1024;
36
37#[derive(Debug, thiserror::Error)]
39pub enum WorktreeError {
40 #[error("symlink target '{0}' is invalid (absolute or contains '..')")]
42 InvalidSymlinkTarget(String),
43 #[error("file '{0}' exceeds the {MAX_FILE_BYTES} byte limit")]
45 FileTooLarge(PathBuf),
46 #[error("path component is not valid UTF-8")]
48 InvalidUtf8,
49 #[error(transparent)]
51 Io(#[from] io::Error),
52 #[error(transparent)]
54 Object(#[from] crate::object::MkitError),
55 #[error(transparent)]
57 Store(#[from] crate::store::StoreError),
58}
59
60pub type WorktreeResult<T> = Result<T, WorktreeError>;
62
63#[must_use]
66pub fn validate_symlink_target(target: &str) -> bool {
67 if target.is_empty() {
68 return false;
69 }
70 if target.starts_with('/') {
71 return false;
72 }
73 for part in target.split('/') {
74 if part == ".." {
75 return false;
76 }
77 }
78 true
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct StatObservation {
90 pub path: String,
92 pub object_hash: Hash,
94 pub mtime_ns: u64,
97 pub size: u64,
98 pub ino: u64,
99 pub ctime_ns: u64,
100}
101
102pub fn build_tree<S: ObjectSink + ?Sized>(sink: &S, dir: &Path) -> WorktreeResult<Hash> {
115 build_tree_filtered(sink, dir, None)
116}
117
118pub fn build_tree_filtered<S: ObjectSink + ?Sized>(
127 sink: &S,
128 dir: &Path,
129 index: Option<&Index>,
130) -> WorktreeResult<Hash> {
131 build_tree_filtered_observed(sink, dir, index, &mut Vec::new())
132}
133
134pub fn build_tree_filtered_observed<S: ObjectSink + ?Sized>(
142 sink: &S,
143 dir: &Path,
144 index: Option<&Index>,
145 observations: &mut Vec<StatObservation>,
146) -> WorktreeResult<Hash> {
147 let ignores = ignore::load(dir).map_err(|e| match e {
148 crate::ignore::IgnoreError::Io(io) => WorktreeError::Io(io),
149 crate::ignore::IgnoreError::FileTooLarge => {
150 WorktreeError::Io(io::Error::other("ignore file exceeds 1 MiB"))
151 }
152 })?;
153 let loaded;
156 let index = if let Some(i) = index {
157 i
158 } else {
159 loaded = index::read_index(dir).unwrap_or_default();
160 &loaded
161 };
162 let by_path: std::collections::HashMap<&str, &crate::index::IndexEntry> =
165 index.entries.iter().map(|e| (e.path.as_str(), e)).collect();
166 build_tree_inner(
167 sink,
168 dir,
169 "",
170 &ignores,
171 index,
172 &by_path,
173 false,
174 observations,
175 )
176}
177
178#[allow(clippy::too_many_arguments)]
184fn build_tree_inner<S: ObjectSink + ?Sized>(
185 sink: &S,
186 dir: &Path,
187 rel_dir: &str,
188 ignores: &IgnoreList,
189 index: &Index,
190 by_path: &std::collections::HashMap<&str, &crate::index::IndexEntry>,
191 parent_ignored: bool,
192 observations: &mut Vec<StatObservation>,
193) -> WorktreeResult<Hash> {
194 let mut entries: Vec<TreeEntry> = Vec::new();
195
196 for entry in fs::read_dir(dir)? {
197 let entry = entry?;
198 let file_name = entry.file_name();
199 let name_str = file_name
200 .to_str()
201 .ok_or(WorktreeError::InvalidUtf8)?
202 .to_string();
203 let meta = entry.path().symlink_metadata()?;
205 let is_dir = meta.is_dir();
206 let rel_path = if rel_dir.is_empty() {
207 name_str.clone()
208 } else {
209 format!("{rel_dir}/{name_str}")
210 };
211 let entry_ignored = parent_ignored || ignores.is_ignored(&rel_path, is_dir);
216 if entry_ignored && !index.tracks_path_or_descendant(&rel_path) {
217 continue;
218 }
219
220 let name_bytes = name_str.as_bytes();
221 if !TreeEntry::validate_name(name_bytes) {
222 return Err(WorktreeError::Io(io::Error::new(
223 io::ErrorKind::InvalidInput,
224 format!("invalid tree entry name: {name_str:?}"),
225 )));
226 }
227
228 if meta.file_type().is_file() {
229 let indexed = by_path.get(rel_path.as_str()).copied();
235 let cached = indexed.filter(|e| stat_matches(e, &meta));
236 let (object_hash, mode) = if let Some(e) = cached {
237 (e.object_hash, entry_mode_from_file_metadata(&meta))
238 } else {
239 let (h, opened_meta) = hash_file_with_metadata(sink, &entry.path())?;
240 if let Some(e) = indexed
245 && e.object_hash == h
246 {
247 let (mtime_ns, size, ino, ctime_ns) = stat_cache_fields(&opened_meta);
248 observations.push(StatObservation {
249 path: rel_path.clone(),
250 object_hash: h,
251 mtime_ns,
252 size,
253 ino,
254 ctime_ns,
255 });
256 }
257 (h, entry_mode_from_file_metadata(&opened_meta))
258 };
259 entries.push(TreeEntry {
260 name: name_str.into_bytes(),
261 mode,
262 object_hash,
263 });
264 } else if meta.file_type().is_dir() {
265 if index.has_tracked_file_at(&rel_path) {
271 continue;
272 }
273 let h = build_tree_inner(
274 sink,
275 &entry.path(),
276 &rel_path,
277 ignores,
278 index,
279 by_path,
280 entry_ignored,
281 observations,
282 )?;
283 entries.push(TreeEntry {
284 name: name_str.into_bytes(),
285 mode: EntryMode::Tree,
286 object_hash: h,
287 });
288 } else if meta.file_type().is_symlink() {
289 let target = fs::read_link(entry.path())?;
290 let target_str = target
291 .to_str()
292 .ok_or(WorktreeError::InvalidUtf8)?
293 .to_string();
294 if !validate_symlink_target(&target_str) {
295 return Err(WorktreeError::InvalidSymlinkTarget(target_str));
296 }
297 let target_bytes = target_str.as_bytes();
298 let prologue = serialize::blob_prologue(target_bytes.len())?;
299 let h = sink.put_parts(&[&prologue, target_bytes])?;
300 entries.push(TreeEntry {
301 name: name_str.into_bytes(),
302 mode: EntryMode::Symlink,
303 object_hash: h,
304 });
305 } else {
306 }
308 }
309
310 entries.sort_by(|a, b| a.name.cmp(&b.name));
311 let tree = Object::Tree(Tree { entries });
312 let bytes = serialize::serialize(&tree)?;
313 Ok(sink.put(&bytes)?)
314}
315
316pub fn build_tree_from_index(
337 store: &ObjectStore,
338 index: &crate::index::Index,
339) -> WorktreeResult<Hash> {
340 build_tree_from_index_with(store, store, index, true)
343}
344
345#[allow(clippy::items_after_statements, clippy::too_many_lines)]
362pub fn build_tree_from_index_with<S: ObjectSink + ?Sized>(
363 store: &ObjectStore,
364 sink: &S,
365 index: &crate::index::Index,
366 verify: bool,
367) -> WorktreeResult<Hash> {
368 use crate::index::EntryStatus;
369
370 #[derive(Default)]
373 struct Node {
374 children: std::collections::BTreeMap<String, Node>,
376 leaves: std::collections::BTreeMap<String, (EntryMode, Hash)>,
378 }
379
380 let mut root = Node::default();
381 let mut seen_paths = std::collections::HashSet::with_capacity(index.entries.len());
382
383 for entry in &index.entries {
384 if !seen_paths.insert(entry.path.as_str()) {
385 return Err(WorktreeError::Io(io::Error::other(format!(
386 "duplicate index path: '{}'",
387 entry.path
388 ))));
389 }
390 if entry.status == EntryStatus::Removed {
391 continue;
392 }
393 let mode = match entry.status {
394 EntryStatus::Blob => EntryMode::Blob,
395 EntryStatus::Executable => EntryMode::Executable,
396 EntryStatus::Symlink => EntryMode::Symlink,
397 EntryStatus::Tree => {
398 return Err(WorktreeError::Io(io::Error::other(
402 "index entry uses reserved Tree status (subtree staging not implemented)",
403 )));
404 }
405 EntryStatus::Removed => unreachable!("filtered above"),
406 };
407 let object_type = if verify {
421 store.verify_object_type(&entry.object_hash)?
422 } else {
423 store.object_type(&entry.object_hash)?
424 };
425 match object_type {
426 crate::object::ObjectType::Blob => {}
427 crate::object::ObjectType::ChunkedBlob if mode != EntryMode::Symlink => {}
428 other => {
429 return Err(WorktreeError::Io(io::Error::other(format!(
430 "index entry '{}' points to a non-blob object (got {})",
431 entry.path,
432 other.name()
433 ))));
434 }
435 }
436
437 let segments: Vec<&str> = entry.path.split('/').collect();
439 let Some((leaf, dirs)) = segments.split_last() else {
440 return Err(WorktreeError::Io(io::Error::other("empty index path")));
441 };
442 if leaf.is_empty() {
443 return Err(WorktreeError::Io(io::Error::other(
444 "trailing slash in index path",
445 )));
446 }
447
448 let mut node = &mut root;
449 let mut walked = String::new();
450 for seg in dirs {
451 if seg.is_empty() {
452 return Err(WorktreeError::Io(io::Error::other(
453 "empty path segment in index",
454 )));
455 }
456 if node.leaves.contains_key(*seg) {
463 let conflicting = if walked.is_empty() {
464 (*seg).to_string()
465 } else {
466 format!("{walked}/{seg}")
467 };
468 return Err(WorktreeError::Io(io::Error::other(format!(
469 "index path conflict: '{conflicting}' is staged as both a file and a directory"
470 ))));
471 }
472 walked = if walked.is_empty() {
473 (*seg).to_string()
474 } else {
475 format!("{walked}/{seg}")
476 };
477 node = node.children.entry((*seg).to_string()).or_default();
478 }
479 if node.children.contains_key(*leaf) {
483 let conflicting = if walked.is_empty() {
484 (*leaf).to_string()
485 } else {
486 format!("{walked}/{leaf}")
487 };
488 return Err(WorktreeError::Io(io::Error::other(format!(
489 "index path conflict: '{conflicting}' is staged as both a file and a directory"
490 ))));
491 }
492 if node
493 .leaves
494 .insert((*leaf).to_string(), (mode, entry.object_hash))
495 .is_some()
496 {
497 let duplicate = if walked.is_empty() {
498 (*leaf).to_string()
499 } else {
500 format!("{walked}/{leaf}")
501 };
502 return Err(WorktreeError::Io(io::Error::other(format!(
503 "duplicate index path: '{duplicate}'"
504 ))));
505 }
506 }
507
508 fn write_node<S: ObjectSink + ?Sized>(sink: &S, node: &Node) -> WorktreeResult<Hash> {
509 let mut entries: Vec<TreeEntry> = Vec::new();
510
511 for (name, child) in &node.children {
513 let h = write_node(sink, child)?;
514 let bytes = name.as_bytes().to_vec();
515 if !crate::object::TreeEntry::validate_name(&bytes) {
516 return Err(WorktreeError::Io(io::Error::other(format!(
517 "invalid tree entry name: {name:?}"
518 ))));
519 }
520 entries.push(TreeEntry {
521 name: bytes,
522 mode: EntryMode::Tree,
523 object_hash: h,
524 });
525 }
526
527 for (name, (mode, hash)) in &node.leaves {
529 let bytes = name.as_bytes().to_vec();
530 if !crate::object::TreeEntry::validate_name(&bytes) {
531 return Err(WorktreeError::Io(io::Error::other(format!(
532 "invalid tree entry name: {name:?}"
533 ))));
534 }
535 entries.push(TreeEntry {
536 name: bytes,
537 mode: *mode,
538 object_hash: *hash,
539 });
540 }
541
542 entries.sort_by(|a, b| a.name.cmp(&b.name));
544 let tree = Object::Tree(Tree { entries });
545 let bytes = serialize::serialize(&tree)?;
546 Ok(sink.put(&bytes)?)
547 }
548
549 write_node(sink, &root)
550}
551
552pub fn hash_file<S: ObjectSink + ?Sized>(sink: &S, path: &Path) -> WorktreeResult<Hash> {
565 hash_file_with_metadata(sink, path).map(|(hash, _)| hash)
566}
567
568pub fn read_regular_file_bounded(path: &Path) -> WorktreeResult<(fs::Metadata, Vec<u8>)> {
572 let mut file = open_regular_file(path)?;
573 let meta = file.metadata()?;
574 if !meta.file_type().is_file() {
575 return Err(WorktreeError::Io(io::Error::new(
576 io::ErrorKind::InvalidInput,
577 "path is not a regular file",
578 )));
579 }
580 if meta.len() > MAX_FILE_BYTES {
581 return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
582 }
583 let initial_capacity = usize::try_from(meta.len().min(CHUNK_THRESHOLD))
584 .map_err(|_| WorktreeError::FileTooLarge(path.to_path_buf()))?;
585 let mut data = Vec::with_capacity(initial_capacity);
586 file.by_ref()
587 .take(MAX_FILE_BYTES + 1)
588 .read_to_end(&mut data)?;
589 if u64::try_from(data.len()).unwrap_or(u64::MAX) > MAX_FILE_BYTES {
590 return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
591 }
592 Ok((meta, data))
593}
594
595fn hash_file_with_metadata<S: ObjectSink + ?Sized>(
596 sink: &S,
597 path: &Path,
598) -> WorktreeResult<(Hash, fs::Metadata)> {
599 let (meta, data) = read_regular_file_bounded(path)?;
600 let hash = store_file_object(sink, &data)?;
601 Ok((hash, meta))
602}
603
604pub fn store_file_object<S: ObjectSink + ?Sized>(sink: &S, data: &[u8]) -> WorktreeResult<Hash> {
619 if u64::try_from(data.len()).unwrap_or(u64::MAX) <= CHUNK_THRESHOLD {
620 let prologue = serialize::blob_prologue(data.len())?;
624 return Ok(sink.put_parts(&[&prologue, data])?);
625 }
626
627 let total_size = data.len() as u64;
633 let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), data)
634 .map(|b| {
635 let chunk = &data[b.offset..b.offset + b.length];
636 let prologue = serialize::blob_prologue(chunk.len())?;
637 Ok::<_, WorktreeError>(sink.put_parts(&[&prologue, chunk])?)
638 })
639 .collect::<Result<_, _>>()?;
640
641 let manifest = Object::ChunkedBlob(ChunkedBlob {
642 total_size,
643 chunk_size: 0, chunks,
645 });
646 let manifest_bytes = serialize::serialize(&manifest)?;
647 Ok(sink.put(&manifest_bytes)?)
648}
649
650pub fn hash_file_object(data: &[u8]) -> WorktreeResult<Hash> {
661 let hash_parts = |parts: &[&[u8]]| {
662 let mut hasher = crate::hash::Hasher::new();
663 for p in parts {
664 hasher.update(p);
665 }
666 hasher.finalize()
667 };
668 if u64::try_from(data.len()).unwrap_or(u64::MAX) <= CHUNK_THRESHOLD {
669 let prologue = serialize::blob_prologue(data.len())?;
670 return Ok(hash_parts(&[&prologue, data]));
671 }
672 let total_size = data.len() as u64;
673 let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), data)
674 .map(|b| {
675 let chunk = &data[b.offset..b.offset + b.length];
676 let prologue = serialize::blob_prologue(chunk.len())?;
677 Ok::<_, WorktreeError>(hash_parts(&[&prologue, chunk]))
678 })
679 .collect::<Result<_, _>>()?;
680 let manifest = Object::ChunkedBlob(ChunkedBlob {
681 total_size,
682 chunk_size: 0,
683 chunks,
684 });
685 Ok(crate::hash::hash(&serialize::serialize(&manifest)?))
686}
687
688#[must_use]
692pub fn mtime_nanos(meta: &fs::Metadata) -> u64 {
693 meta.modified()
694 .ok()
695 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
696 .map_or(0, |d| u64::try_from(d.as_nanos()).unwrap_or(u64::MAX))
697}
698
699#[must_use]
705pub fn stat_cache_fields(meta: &fs::Metadata) -> (u64, u64, u64, u64) {
706 #[cfg(unix)]
707 let (ino, ctime_ns) = {
708 use std::os::unix::fs::MetadataExt;
709 let ctime_ns = u64::try_from(meta.ctime())
710 .ok()
711 .and_then(|s| s.checked_mul(1_000_000_000))
712 .and_then(|ns| ns.checked_add(u64::try_from(meta.ctime_nsec()).unwrap_or(0)))
713 .unwrap_or(0);
714 (meta.ino(), ctime_ns)
715 };
716 #[cfg(not(unix))]
717 let (ino, ctime_ns) = (0u64, 0u64);
718 (mtime_nanos(meta), meta.len(), ino, ctime_ns)
719}
720
721#[must_use]
730pub fn stat_matches(entry: &crate::index::IndexEntry, meta: &fs::Metadata) -> bool {
731 use crate::index::EntryStatus;
732 if entry.mtime_ns == 0 || !meta.is_file() {
733 return false;
734 }
735 let (mtime_ns, size, ino, ctime_ns) = stat_cache_fields(meta);
736 if size != entry.size || mtime_ns != entry.mtime_ns {
737 return false;
738 }
739 if entry.ino != 0 && ino != 0 && ino != entry.ino {
742 return false;
743 }
744 if entry.ctime_ns != 0 && ctime_ns != 0 && ctime_ns != entry.ctime_ns {
745 return false;
746 }
747 match entry.status {
748 #[cfg(not(unix))]
753 EntryStatus::Blob | EntryStatus::Executable => true,
754 #[cfg(unix)]
755 EntryStatus::Blob => entry_mode_from_file_metadata(meta) == EntryMode::Blob,
756 #[cfg(unix)]
757 EntryStatus::Executable => entry_mode_from_file_metadata(meta) == EntryMode::Executable,
758 EntryStatus::Symlink | EntryStatus::Removed | EntryStatus::Tree => false,
759 }
760}
761
762pub fn read_blob<S: crate::store::ObjectSource + ?Sized>(
777 store: &S,
778 hash: &Hash,
779) -> WorktreeResult<Vec<u8>> {
780 match store.read_object(hash)? {
781 Object::Blob(b) => Ok(b.data),
782 Object::ChunkedBlob(manifest) => {
783 let mut data = Vec::with_capacity(usize::try_from(manifest.total_size).unwrap_or(0));
784 for chunk in &manifest.chunks {
785 match store.read_object(chunk)? {
786 Object::Blob(b) => data.extend_from_slice(&b.data),
787 other => {
788 return Err(WorktreeError::Io(io::Error::other(format!(
789 "chunk {} is not a blob (got {})",
790 crate::hash::to_hex(chunk),
791 other.object_type().name()
792 ))));
793 }
794 }
795 }
796 Ok(data)
797 }
798 other => Err(WorktreeError::Io(io::Error::other(format!(
799 "object {} is not a blob (got {})",
800 crate::hash::to_hex(hash),
801 other.object_type().name()
802 )))),
803 }
804}
805
806#[cfg(unix)]
807fn open_regular_file(path: &Path) -> io::Result<fs::File> {
808 use std::os::unix::fs::OpenOptionsExt;
809
810 fs::OpenOptions::new()
811 .read(true)
812 .custom_flags(libc::O_NOFOLLOW)
813 .open(path)
814}
815
816#[cfg(not(unix))]
817fn open_regular_file(path: &Path) -> io::Result<fs::File> {
818 let meta = path.symlink_metadata()?;
822 if !meta.file_type().is_file() {
823 return Err(io::Error::new(
824 io::ErrorKind::InvalidInput,
825 "path is not a regular file",
826 ));
827 }
828 fs::File::open(path)
829}
830
831#[cfg(unix)]
832fn entry_mode_from_file_metadata(meta: &fs::Metadata) -> EntryMode {
833 use std::os::unix::fs::PermissionsExt;
834
835 if meta.permissions().mode() & 0o111 != 0 {
836 EntryMode::Executable
837 } else {
838 EntryMode::Blob
839 }
840}
841
842#[cfg(not(unix))]
843fn entry_mode_from_file_metadata(_meta: &fs::Metadata) -> EntryMode {
844 EntryMode::Blob
845}
846
847#[cfg(test)]
848mod tests {
849 use super::*;
850 use crate::object::ObjectType;
851 use tempfile::TempDir;
852
853 fn fresh_store() -> (TempDir, ObjectStore) {
854 let dir = TempDir::new().unwrap();
855 let store = ObjectStore::init(dir.path()).unwrap();
856 (dir, store)
857 }
858
859 #[test]
860 fn validate_symlink_targets() {
861 assert!(validate_symlink_target("hello"));
862 assert!(validate_symlink_target("sub/dir/file"));
863 assert!(!validate_symlink_target(""));
864 assert!(!validate_symlink_target("/etc/passwd"));
865 assert!(!validate_symlink_target("../escape"));
866 assert!(!validate_symlink_target("a/../b"));
867 }
868
869 #[test]
870 fn build_tree_from_empty_dir() {
871 let (_sd, store) = fresh_store();
872 let work = TempDir::new().unwrap();
873 let h = build_tree(&store, work.path()).unwrap();
874 let obj = store.read_object(&h).unwrap();
875 match obj {
876 Object::Tree(t) => assert_eq!(t.entries.len(), 0),
877 other => panic!("expected tree, got {other:?}"),
878 }
879 }
880
881 #[test]
882 fn build_tree_with_single_file() {
883 let (_sd, store) = fresh_store();
884 let work = TempDir::new().unwrap();
885 fs::write(work.path().join("hello.txt"), b"hello world").unwrap();
886 let h = build_tree(&store, work.path()).unwrap();
887 let obj = store.read_object(&h).unwrap();
888 let Object::Tree(t) = obj else {
889 panic!("expected tree");
890 };
891 assert_eq!(t.entries.len(), 1);
892 assert_eq!(t.entries[0].name.as_slice(), b"hello.txt");
893 assert_eq!(t.entries[0].mode, EntryMode::Blob);
894 let blob_obj = store.read_object(&t.entries[0].object_hash).unwrap();
895 let Object::Blob(b) = blob_obj else {
896 panic!("expected blob");
897 };
898 assert_eq!(b.data, b"hello world");
899 }
900
901 #[cfg(unix)]
902 #[test]
903 fn build_tree_marks_executable_regular_files() {
904 use std::os::unix::fs::PermissionsExt;
905
906 let (_sd, store) = fresh_store();
907 let work = TempDir::new().unwrap();
908 let script = work.path().join("run.sh");
909 fs::write(&script, b"#!/bin/sh\n").unwrap();
910 let mut perms = fs::metadata(&script).unwrap().permissions();
911 perms.set_mode(perms.mode() | 0o111);
912 fs::set_permissions(&script, perms).unwrap();
913
914 let h = build_tree(&store, work.path()).unwrap();
915 let Object::Tree(t) = store.read_object(&h).unwrap() else {
916 panic!("expected tree");
917 };
918 assert_eq!(t.entries[0].name.as_slice(), b"run.sh");
919 assert_eq!(t.entries[0].mode, EntryMode::Executable);
920 }
921
922 #[cfg(unix)]
923 #[test]
924 fn build_tree_rejects_invalid_entry_name_before_writing_tree() {
925 let (_sd, store) = fresh_store();
926 let work = TempDir::new().unwrap();
927 fs::write(work.path().join("bad."), b"bad name").unwrap();
928
929 let err = build_tree(&store, work.path()).unwrap_err();
930 assert!(matches!(err, WorktreeError::Io(_)));
931 }
932
933 #[cfg(unix)]
934 #[test]
935 fn hash_file_rejects_final_component_symlink() {
936 use std::os::unix::fs::symlink;
937
938 let (_sd, store) = fresh_store();
939 let work = TempDir::new().unwrap();
940 fs::write(work.path().join("target.txt"), b"target").unwrap();
941 symlink("target.txt", work.path().join("link.txt")).unwrap();
942
943 let err = hash_file(&store, &work.path().join("link.txt")).unwrap_err();
944 assert!(matches!(err, WorktreeError::Io(_)));
945 }
946
947 #[test]
948 fn build_tree_with_nested_directories() {
949 let (_sd, store) = fresh_store();
950 let work = TempDir::new().unwrap();
951 fs::write(work.path().join("a.txt"), b"file a").unwrap();
952 fs::create_dir(work.path().join("subdir")).unwrap();
953 fs::write(work.path().join("subdir/b.txt"), b"file b").unwrap();
954 let h = build_tree(&store, work.path()).unwrap();
955 let obj = store.read_object(&h).unwrap();
956 let Object::Tree(t) = obj else {
957 panic!("expected tree");
958 };
959 assert_eq!(t.entries.len(), 2);
960 assert_eq!(t.entries[0].name.as_slice(), b"a.txt");
962 assert_eq!(t.entries[1].name.as_slice(), b"subdir");
963 assert_eq!(t.entries[1].mode, EntryMode::Tree);
964 let sub = store.read_object(&t.entries[1].object_hash).unwrap();
965 let Object::Tree(st) = sub else {
966 panic!("expected tree");
967 };
968 assert_eq!(st.entries.len(), 1);
969 assert_eq!(st.entries[0].name.as_slice(), b"b.txt");
970 }
971
972 #[test]
973 fn build_tree_skips_mkit_directory() {
974 let (_sd, store) = fresh_store();
975 let work = TempDir::new().unwrap();
976 fs::create_dir(work.path().join(".mkit")).unwrap();
977 fs::write(work.path().join(".mkit/should_skip"), b"").unwrap();
978 fs::write(work.path().join("keep.txt"), b"kept").unwrap();
979 let h = build_tree(&store, work.path()).unwrap();
980 let obj = store.read_object(&h).unwrap();
981 let Object::Tree(t) = obj else {
982 panic!("expected tree");
983 };
984 assert_eq!(t.entries.len(), 1);
985 assert_eq!(t.entries[0].name.as_slice(), b"keep.txt");
986 }
987
988 #[test]
989 fn build_tree_is_deterministic() {
990 let (_sd, store) = fresh_store();
991 let work = TempDir::new().unwrap();
992 fs::write(work.path().join("z.txt"), b"z").unwrap();
993 fs::write(work.path().join("a.txt"), b"a").unwrap();
994 let h1 = build_tree(&store, work.path()).unwrap();
995 let h2 = build_tree(&store, work.path()).unwrap();
996 assert_eq!(h1, h2);
997 }
998
999 #[test]
1000 fn build_tree_respects_mkitignore() {
1001 let (_sd, store) = fresh_store();
1002 let work = TempDir::new().unwrap();
1003 fs::write(work.path().join(".mkitignore"), b"*.log\n").unwrap();
1004 fs::write(work.path().join("keep.txt"), b"kept").unwrap();
1005 fs::write(work.path().join("debug.log"), b"ignored").unwrap();
1006 let h = build_tree(&store, work.path()).unwrap();
1007 let obj = store.read_object(&h).unwrap();
1008 let Object::Tree(t) = obj else {
1009 panic!("expected tree");
1010 };
1011 assert_eq!(t.entries.len(), 2);
1013 assert_eq!(t.entries[0].name.as_slice(), b".mkitignore");
1014 assert_eq!(t.entries[1].name.as_slice(), b"keep.txt");
1015 }
1016
1017 #[cfg(unix)]
1018 #[test]
1019 fn rejects_invalid_symlink_targets() {
1020 use std::os::unix::fs::symlink;
1021 let (_sd, store) = fresh_store();
1022 let work = TempDir::new().unwrap();
1023 symlink("/etc/passwd", work.path().join("bad-link")).unwrap();
1024 let err = build_tree(&store, work.path()).unwrap_err();
1025 assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
1026 }
1027
1028 #[cfg(unix)]
1029 #[test]
1030 fn rejects_dotdot_symlink_targets() {
1031 use std::os::unix::fs::symlink;
1032 let (_sd, store) = fresh_store();
1033 let work = TempDir::new().unwrap();
1034 symlink("../../etc/passwd", work.path().join("bad-link")).unwrap();
1035 let err = build_tree(&store, work.path()).unwrap_err();
1036 assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
1037 }
1038
1039 #[test]
1040 fn small_file_stays_as_regular_blob() {
1041 let (_sd, store) = fresh_store();
1042 let work = TempDir::new().unwrap();
1043 fs::write(work.path().join("small.txt"), b"hello world").unwrap();
1044 let h = build_tree(&store, work.path()).unwrap();
1045 let obj = store.read_object(&h).unwrap();
1046 let Object::Tree(t) = obj else {
1047 panic!("expected tree");
1048 };
1049 let entry = store.read_object(&t.entries[0].object_hash).unwrap();
1050 assert_eq!(entry.object_type(), ObjectType::Blob);
1051 }
1052
1053 #[test]
1054 fn large_file_becomes_chunked_blob() {
1055 let (_sd, store) = fresh_store();
1060 let work = TempDir::new().unwrap();
1061 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1062 let mut big = Vec::with_capacity(n);
1063 let mut state: u64 = 0x00C0_FFEE;
1064 for _ in 0..n {
1065 state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
1067 let mut z = state;
1068 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
1069 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
1070 z ^= z >> 31;
1071 big.push((z & 0xFF) as u8);
1072 }
1073 fs::write(work.path().join("big.bin"), &big).unwrap();
1074
1075 let tree_hash = build_tree(&store, work.path()).unwrap();
1076 let Object::Tree(t) = store.read_object(&tree_hash).unwrap() else {
1077 panic!("expected tree");
1078 };
1079 assert_eq!(t.entries.len(), 1);
1080
1081 let entry_hash = t.entries[0].object_hash;
1082 let entry = store.read_object(&entry_hash).unwrap();
1083 let Object::ChunkedBlob(manifest) = entry else {
1084 panic!("expected chunked_blob, got {entry:?}");
1085 };
1086
1087 assert_eq!(manifest.total_size, n as u64);
1088 assert_eq!(manifest.chunk_size, 0, "0 = content-defined (FastCDC)");
1089 assert!(!manifest.chunks.is_empty());
1090 let mut reassembled: Vec<u8> = Vec::with_capacity(n);
1093 for h in &manifest.chunks {
1094 let Object::Blob(b) = store.read_object(h).unwrap() else {
1095 panic!("chunk did not resolve to a Blob");
1096 };
1097 reassembled.extend_from_slice(&b.data);
1098 }
1099 assert_eq!(reassembled, big, "chunks must round-trip the source");
1100 }
1101
1102 use crate::index::{EntryStatus, Index, IndexEntry};
1105
1106 fn write_blob(store: &ObjectStore, bytes: &[u8]) -> Hash {
1107 let blob = Object::Blob(crate::object::Blob {
1108 data: bytes.to_vec(),
1109 });
1110 let body = serialize::serialize(&blob).unwrap();
1111 store.write(&body).unwrap()
1112 }
1113
1114 #[test]
1115 fn from_index_empty_returns_empty_tree() {
1116 let (_sd, store) = fresh_store();
1117 let idx = Index::new();
1118 let h = build_tree_from_index(&store, &idx).unwrap();
1119 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1120 panic!("expected tree");
1121 };
1122 assert!(t.entries.is_empty());
1123 }
1124
1125 #[test]
1126 fn from_index_single_file_at_root() {
1127 let (_sd, store) = fresh_store();
1128 let blob_hash = write_blob(&store, b"hello world");
1129 let mut idx = Index::new();
1130 idx.entries.push(IndexEntry {
1131 path: "hello.txt".into(),
1132 status: EntryStatus::Blob,
1133 object_hash: blob_hash,
1134 mtime_ns: 0,
1135 size: 0,
1136 ino: 0,
1137 ctime_ns: 0,
1138 });
1139 let h = build_tree_from_index(&store, &idx).unwrap();
1140 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1141 panic!();
1142 };
1143 assert_eq!(t.entries.len(), 1);
1144 assert_eq!(t.entries[0].name, b"hello.txt");
1145 assert_eq!(t.entries[0].mode, EntryMode::Blob);
1146 assert_eq!(t.entries[0].object_hash, blob_hash);
1147 }
1148
1149 #[test]
1150 fn from_index_nested_paths_build_subtrees() {
1151 let (_sd, store) = fresh_store();
1152 let a = write_blob(&store, b"file a");
1153 let b = write_blob(&store, b"file b");
1154 let mut idx = Index::new();
1155 idx.entries.push(IndexEntry {
1156 path: "a.txt".into(),
1157 status: EntryStatus::Blob,
1158 object_hash: a,
1159 mtime_ns: 0,
1160 size: 0,
1161 ino: 0,
1162 ctime_ns: 0,
1163 });
1164 idx.entries.push(IndexEntry {
1165 path: "subdir/b.txt".into(),
1166 status: EntryStatus::Blob,
1167 object_hash: b,
1168 mtime_ns: 0,
1169 size: 0,
1170 ino: 0,
1171 ctime_ns: 0,
1172 });
1173 let root_hash = build_tree_from_index(&store, &idx).unwrap();
1174 let Object::Tree(root) = store.read_object(&root_hash).unwrap() else {
1175 panic!();
1176 };
1177 assert_eq!(root.entries.len(), 2);
1178 assert_eq!(root.entries[0].name, b"a.txt");
1179 assert_eq!(root.entries[0].mode, EntryMode::Blob);
1180 assert_eq!(root.entries[1].name, b"subdir");
1181 assert_eq!(root.entries[1].mode, EntryMode::Tree);
1182
1183 let Object::Tree(sub) = store.read_object(&root.entries[1].object_hash).unwrap() else {
1184 panic!();
1185 };
1186 assert_eq!(sub.entries.len(), 1);
1187 assert_eq!(sub.entries[0].name, b"b.txt");
1188 assert_eq!(sub.entries[0].object_hash, b);
1189 }
1190
1191 #[test]
1192 fn from_index_removed_entries_are_skipped() {
1193 let (_sd, store) = fresh_store();
1194 let a = write_blob(&store, b"keep me");
1195 let mut idx = Index::new();
1196 idx.entries.push(IndexEntry {
1197 path: "keep.txt".into(),
1198 status: EntryStatus::Blob,
1199 object_hash: a,
1200 mtime_ns: 0,
1201 size: 0,
1202 ino: 0,
1203 ctime_ns: 0,
1204 });
1205 idx.entries.push(IndexEntry {
1206 path: "drop.txt".into(),
1207 status: EntryStatus::Removed,
1208 object_hash: [0; 32],
1209 mtime_ns: 0,
1210 size: 0,
1211 ino: 0,
1212 ctime_ns: 0,
1213 });
1214 let h = build_tree_from_index(&store, &idx).unwrap();
1215 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1216 panic!();
1217 };
1218 assert_eq!(t.entries.len(), 1);
1219 assert_eq!(t.entries[0].name, b"keep.txt");
1220 }
1221
1222 #[test]
1223 fn from_index_executable_and_symlink_modes_pass_through() {
1224 let (_sd, store) = fresh_store();
1225 let exec = write_blob(&store, b"#!/bin/sh");
1226 let link = write_blob(&store, b"target.txt");
1227 let mut idx = Index::new();
1228 idx.entries.push(IndexEntry {
1229 path: "run.sh".into(),
1230 status: EntryStatus::Executable,
1231 object_hash: exec,
1232 mtime_ns: 0,
1233 size: 0,
1234 ino: 0,
1235 ctime_ns: 0,
1236 });
1237 idx.entries.push(IndexEntry {
1238 path: "link".into(),
1239 status: EntryStatus::Symlink,
1240 object_hash: link,
1241 mtime_ns: 0,
1242 size: 0,
1243 ino: 0,
1244 ctime_ns: 0,
1245 });
1246 let h = build_tree_from_index(&store, &idx).unwrap();
1247 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1248 panic!();
1249 };
1250 let by_name: std::collections::HashMap<&[u8], &TreeEntry> =
1251 t.entries.iter().map(|e| (e.name.as_slice(), e)).collect();
1252 assert_eq!(by_name[&b"run.sh"[..]].mode, EntryMode::Executable);
1253 assert_eq!(by_name[&b"link"[..]].mode, EntryMode::Symlink);
1254 }
1255
1256 #[test]
1257 fn from_index_entries_are_sorted_by_name() {
1258 let (_sd, store) = fresh_store();
1259 let a = write_blob(&store, b"x");
1260 let mut idx = Index::new();
1261 idx.entries.push(IndexEntry {
1264 path: "z.txt".into(),
1265 status: EntryStatus::Blob,
1266 object_hash: a,
1267 mtime_ns: 0,
1268 size: 0,
1269 ino: 0,
1270 ctime_ns: 0,
1271 });
1272 idx.entries.push(IndexEntry {
1273 path: "a.txt".into(),
1274 status: EntryStatus::Blob,
1275 object_hash: a,
1276 mtime_ns: 0,
1277 size: 0,
1278 ino: 0,
1279 ctime_ns: 0,
1280 });
1281 idx.entries.push(IndexEntry {
1282 path: "m.txt".into(),
1283 status: EntryStatus::Blob,
1284 object_hash: a,
1285 mtime_ns: 0,
1286 size: 0,
1287 ino: 0,
1288 ctime_ns: 0,
1289 });
1290 let h = build_tree_from_index(&store, &idx).unwrap();
1291 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1292 panic!();
1293 };
1294 let names: Vec<&[u8]> = t.entries.iter().map(|e| e.name.as_slice()).collect();
1295 assert_eq!(names, vec![&b"a.txt"[..], b"m.txt", b"z.txt"]);
1296 }
1297
1298 #[test]
1299 fn from_index_rejects_trailing_slash() {
1300 let (_sd, store) = fresh_store();
1301 let h = write_blob(&store, b"x");
1302 let mut idx = Index::new();
1303 idx.entries.push(IndexEntry {
1304 path: "dir/".into(),
1305 status: EntryStatus::Blob,
1306 object_hash: h,
1307 mtime_ns: 0,
1308 size: 0,
1309 ino: 0,
1310 ctime_ns: 0,
1311 });
1312 let err = build_tree_from_index(&store, &idx).unwrap_err();
1313 assert!(matches!(err, WorktreeError::Io(_)));
1314 }
1315
1316 #[test]
1317 fn from_index_rejects_empty_segment() {
1318 let (_sd, store) = fresh_store();
1319 let h = write_blob(&store, b"x");
1320 let mut idx = Index::new();
1321 idx.entries.push(IndexEntry {
1322 path: "a//b.txt".into(),
1323 status: EntryStatus::Blob,
1324 object_hash: h,
1325 mtime_ns: 0,
1326 size: 0,
1327 ino: 0,
1328 ctime_ns: 0,
1329 });
1330 let err = build_tree_from_index(&store, &idx).unwrap_err();
1331 assert!(matches!(err, WorktreeError::Io(_)));
1332 }
1333
1334 #[test]
1335 fn from_index_rejects_reserved_name() {
1336 let (_sd, store) = fresh_store();
1337 let h = write_blob(&store, b"x");
1338 let mut idx = Index::new();
1339 idx.entries.push(IndexEntry {
1342 path: ".mkit".into(),
1343 status: EntryStatus::Blob,
1344 object_hash: h,
1345 mtime_ns: 0,
1346 size: 0,
1347 ino: 0,
1348 ctime_ns: 0,
1349 });
1350 let err = build_tree_from_index(&store, &idx).unwrap_err();
1351 assert!(matches!(err, WorktreeError::Io(_)));
1352 }
1353
1354 #[test]
1360 fn from_index_matches_build_tree_for_equivalent_worktree() {
1361 let (_sd, store) = fresh_store();
1362
1363 let work = TempDir::new().unwrap();
1368 fs::write(work.path().join("a.txt"), b"alpha").unwrap();
1369 fs::create_dir(work.path().join("dir")).unwrap();
1370 fs::write(work.path().join("dir/b.txt"), b"beta").unwrap();
1371 fs::write(work.path().join("dir/c.txt"), b"gamma").unwrap();
1372 let worktree_root = build_tree(&store, work.path()).unwrap();
1373
1374 let a = write_blob(&store, b"alpha");
1375 let b = write_blob(&store, b"beta");
1376 let c = write_blob(&store, b"gamma");
1377 let mut idx = Index::new();
1378 idx.entries.push(IndexEntry {
1379 path: "a.txt".into(),
1380 status: EntryStatus::Blob,
1381 object_hash: a,
1382 mtime_ns: 0,
1383 size: 0,
1384 ino: 0,
1385 ctime_ns: 0,
1386 });
1387 idx.entries.push(IndexEntry {
1388 path: "dir/b.txt".into(),
1389 status: EntryStatus::Blob,
1390 object_hash: b,
1391 mtime_ns: 0,
1392 size: 0,
1393 ino: 0,
1394 ctime_ns: 0,
1395 });
1396 idx.entries.push(IndexEntry {
1397 path: "dir/c.txt".into(),
1398 status: EntryStatus::Blob,
1399 object_hash: c,
1400 mtime_ns: 0,
1401 size: 0,
1402 ino: 0,
1403 ctime_ns: 0,
1404 });
1405 let index_root = build_tree_from_index(&store, &idx).unwrap();
1406
1407 assert_eq!(
1408 worktree_root, index_root,
1409 "build_tree_from_index must produce the same root hash as build_tree for equivalent contents"
1410 );
1411 }
1412
1413 #[test]
1414 fn from_index_deeply_nested_paths_build_chain_of_subtrees() {
1415 let (_sd, store) = fresh_store();
1416 let h = write_blob(&store, b"deep");
1417 let mut idx = Index::new();
1418 idx.entries.push(IndexEntry {
1419 path: "a/b/c/d/e.txt".into(),
1420 status: EntryStatus::Blob,
1421 object_hash: h,
1422 mtime_ns: 0,
1423 size: 0,
1424 ino: 0,
1425 ctime_ns: 0,
1426 });
1427 let root = build_tree_from_index(&store, &idx).unwrap();
1428 let Object::Tree(t) = store.read_object(&root).unwrap() else {
1429 panic!();
1430 };
1431 assert_eq!(t.entries.len(), 1);
1432 assert_eq!(t.entries[0].name, b"a");
1433 assert_eq!(t.entries[0].mode, EntryMode::Tree);
1434 let mut cursor = t.entries[0].object_hash;
1436 for seg in [b"b" as &[u8], b"c", b"d"] {
1437 let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1438 panic!();
1439 };
1440 assert_eq!(t.entries.len(), 1);
1441 assert_eq!(t.entries[0].name, seg);
1442 cursor = t.entries[0].object_hash;
1443 }
1444 let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1445 panic!();
1446 };
1447 assert_eq!(t.entries[0].name, b"e.txt");
1448 assert_eq!(t.entries[0].object_hash, h);
1449 }
1450
1451 #[test]
1459 fn from_index_rejects_blob_then_subdir_collision() {
1460 let (_sd, store) = fresh_store();
1461 let h = write_blob(&store, b"x");
1462 let mut idx = Index::new();
1463 idx.entries.push(IndexEntry {
1464 path: "a".into(),
1465 status: EntryStatus::Blob,
1466 object_hash: h,
1467 mtime_ns: 0,
1468 size: 0,
1469 ino: 0,
1470 ctime_ns: 0,
1471 });
1472 idx.entries.push(IndexEntry {
1473 path: "a/b".into(),
1474 status: EntryStatus::Blob,
1475 object_hash: h,
1476 mtime_ns: 0,
1477 size: 0,
1478 ino: 0,
1479 ctime_ns: 0,
1480 });
1481 let err = build_tree_from_index(&store, &idx).unwrap_err();
1482 let msg = format!("{err}");
1483 assert!(
1484 msg.contains("conflict") || msg.contains("collision") || msg.contains("'a'"),
1485 "expected collision error mentioning the path, got: {msg}"
1486 );
1487 }
1488
1489 #[test]
1492 fn from_index_rejects_subdir_then_blob_collision() {
1493 let (_sd, store) = fresh_store();
1494 let h = write_blob(&store, b"x");
1495 let mut idx = Index::new();
1496 idx.entries.push(IndexEntry {
1497 path: "a/b".into(),
1498 status: EntryStatus::Blob,
1499 object_hash: h,
1500 mtime_ns: 0,
1501 size: 0,
1502 ino: 0,
1503 ctime_ns: 0,
1504 });
1505 idx.entries.push(IndexEntry {
1506 path: "a".into(),
1507 status: EntryStatus::Blob,
1508 object_hash: h,
1509 mtime_ns: 0,
1510 size: 0,
1511 ino: 0,
1512 ctime_ns: 0,
1513 });
1514 assert!(build_tree_from_index(&store, &idx).is_err());
1515 }
1516
1517 #[test]
1518 fn from_index_rejects_duplicate_exact_path() {
1519 let (_sd, store) = fresh_store();
1520 let a = write_blob(&store, b"a");
1521 let b = write_blob(&store, b"b");
1522 let mut idx = Index::new();
1523 idx.entries.push(IndexEntry {
1524 path: "same.txt".into(),
1525 status: EntryStatus::Blob,
1526 object_hash: a,
1527 mtime_ns: 0,
1528 size: 0,
1529 ino: 0,
1530 ctime_ns: 0,
1531 });
1532 idx.entries.push(IndexEntry {
1533 path: "same.txt".into(),
1534 status: EntryStatus::Blob,
1535 object_hash: b,
1536 mtime_ns: 0,
1537 size: 0,
1538 ino: 0,
1539 ctime_ns: 0,
1540 });
1541
1542 let err = build_tree_from_index(&store, &idx).unwrap_err();
1543 let msg = format!("{err}");
1544 assert!(msg.contains("duplicate index path"), "got: {msg}");
1545 }
1546
1547 #[test]
1548 fn from_index_rejects_duplicate_removed_and_live_path() {
1549 let (_sd, store) = fresh_store();
1550 let h = write_blob(&store, b"live");
1551 let mut idx = Index::new();
1552 idx.entries.push(IndexEntry {
1553 path: "same.txt".into(),
1554 status: EntryStatus::Removed,
1555 object_hash: [0; 32],
1556 mtime_ns: 0,
1557 size: 0,
1558 ino: 0,
1559 ctime_ns: 0,
1560 });
1561 idx.entries.push(IndexEntry {
1562 path: "same.txt".into(),
1563 status: EntryStatus::Blob,
1564 object_hash: h,
1565 mtime_ns: 0,
1566 size: 0,
1567 ino: 0,
1568 ctime_ns: 0,
1569 });
1570
1571 let err = build_tree_from_index(&store, &idx).unwrap_err();
1572 let msg = format!("{err}");
1573 assert!(msg.contains("duplicate index path"), "got: {msg}");
1574 }
1575
1576 #[test]
1583 fn from_index_all_removed_produces_empty_tree() {
1584 let (_sd, store) = fresh_store();
1585 let mut idx = Index::new();
1586 idx.entries.push(IndexEntry {
1587 path: "gone.txt".into(),
1588 status: EntryStatus::Removed,
1589 object_hash: [0; 32],
1590 mtime_ns: 0,
1591 size: 0,
1592 ino: 0,
1593 ctime_ns: 0,
1594 });
1595 let h = build_tree_from_index(&store, &idx).unwrap();
1596 let Object::Tree(t) = store.read_object(&h).unwrap() else {
1597 panic!();
1598 };
1599 assert!(t.entries.is_empty());
1600 }
1601
1602 #[test]
1605 fn from_index_root_is_a_tree_object() {
1606 let (_sd, store) = fresh_store();
1607 let idx = Index::new();
1608 let h = build_tree_from_index(&store, &idx).unwrap();
1609 let obj = store.read_object(&h).unwrap();
1610 assert_eq!(obj.object_type(), ObjectType::Tree);
1611 }
1612
1613 #[test]
1614 fn from_index_rejects_missing_blob_object() {
1615 let (_sd, store) = fresh_store();
1616 let mut idx = Index::new();
1617 idx.entries.push(IndexEntry {
1618 path: "missing.txt".into(),
1619 status: EntryStatus::Blob,
1620 object_hash: [42; 32],
1621 mtime_ns: 0,
1622 size: 0,
1623 ino: 0,
1624 ctime_ns: 0,
1625 });
1626
1627 let err = build_tree_from_index(&store, &idx).unwrap_err();
1628 assert!(matches!(err, WorktreeError::Store(_)));
1629 }
1630
1631 #[test]
1632 fn from_index_rejects_non_blob_object_for_blob_status() {
1633 let (_sd, store) = fresh_store();
1634 let tree = Object::Tree(Tree { entries: vec![] });
1635 let body = serialize::serialize(&tree).unwrap();
1636 let tree_hash = store.write(&body).unwrap();
1637 let mut idx = Index::new();
1638 idx.entries.push(IndexEntry {
1639 path: "not-a-blob.txt".into(),
1640 status: EntryStatus::Blob,
1641 object_hash: tree_hash,
1642 mtime_ns: 0,
1643 size: 0,
1644 ino: 0,
1645 ctime_ns: 0,
1646 });
1647
1648 let err = build_tree_from_index(&store, &idx).unwrap_err();
1649 let msg = format!("{err}");
1650 assert!(
1651 msg.contains("non-blob"),
1652 "expected non-blob index object error, got: {msg}"
1653 );
1654 }
1655
1656 #[test]
1663 fn from_index_accepts_chunked_blob_for_file_entry() {
1664 let (_sd, store) = fresh_store();
1665 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1668 let mut big = Vec::with_capacity(n);
1669 let mut state: u64 = 0x00C0_FFEE;
1670 for _ in 0..n {
1671 state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
1672 let mut z = state;
1673 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
1674 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
1675 z ^= z >> 31;
1676 big.push((z & 0xFF) as u8);
1677 }
1678 let chunked_hash = store_file_object(&store, &big).unwrap();
1679 assert!(
1680 matches!(
1681 store.read_object(&chunked_hash).unwrap(),
1682 Object::ChunkedBlob(_)
1683 ),
1684 "fixture must be a ChunkedBlob"
1685 );
1686
1687 let mut idx = Index::new();
1688 idx.entries.push(IndexEntry {
1689 path: "big.bin".into(),
1690 status: EntryStatus::Blob,
1691 object_hash: chunked_hash,
1692 mtime_ns: 0,
1693 size: 0,
1694 ino: 0,
1695 ctime_ns: 0,
1696 });
1697 let root = build_tree_from_index(&store, &idx).unwrap();
1698 let Object::Tree(t) = store.read_object(&root).unwrap() else {
1699 panic!("expected tree");
1700 };
1701 assert_eq!(t.entries.len(), 1);
1702 assert_eq!(t.entries[0].name, b"big.bin");
1703 assert_eq!(t.entries[0].mode, EntryMode::Blob);
1704 assert_eq!(t.entries[0].object_hash, chunked_hash);
1705 assert_eq!(read_blob(&store, &chunked_hash).unwrap(), big);
1707 }
1708
1709 #[test]
1712 fn from_index_rejects_chunked_blob_for_symlink_entry() {
1713 let (_sd, store) = fresh_store();
1714 let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1715 let big = vec![0xABu8; n];
1716 let chunked_hash = store_file_object(&store, &big).unwrap();
1717 let mut idx = Index::new();
1718 idx.entries.push(IndexEntry {
1719 path: "link".into(),
1720 status: EntryStatus::Symlink,
1721 object_hash: chunked_hash,
1722 mtime_ns: 0,
1723 size: 0,
1724 ino: 0,
1725 ctime_ns: 0,
1726 });
1727 let err = build_tree_from_index(&store, &idx).unwrap_err();
1728 assert!(format!("{err}").contains("non-blob"));
1729 }
1730
1731 #[test]
1738 fn store_file_object_via_batch_equals_via_store() {
1739 let data: Vec<u8> = (0..3 * 1024 * 1024u32)
1741 .map(|i| u8::try_from((i.wrapping_mul(2_654_435_761)) % 251).unwrap())
1742 .collect();
1743
1744 let (_d1, store1) = fresh_store();
1745 let h_store = store_file_object(&store1, &data).unwrap();
1746
1747 let (_d2, store2) = fresh_store();
1748 let batch = store2.batch();
1749 let h_batch = store_file_object(&batch, &data).unwrap();
1750 batch.commit().unwrap();
1751
1752 assert_eq!(h_store, h_batch, "sink choice must not change hashes");
1753 assert_eq!(
1754 read_blob(&store1, &h_store).unwrap(),
1755 read_blob(&store2, &h_batch).unwrap(),
1756 );
1757
1758 let small = b"under the chunk threshold";
1760 let h1 = store_file_object(&store1, small).unwrap();
1761 let batch2 = store2.batch();
1762 let h2 = store_file_object(&batch2, small).unwrap();
1763 batch2.commit().unwrap();
1764 assert_eq!(h1, h2);
1765 }
1766
1767 #[test]
1770 fn build_tree_from_index_with_batch_single_flush() {
1771 use crate::batch::testing::{Ev, RecordingSyncer};
1772 use crate::index::{EntryStatus, Index, IndexEntry};
1773 use std::sync::Arc;
1774
1775 let (_sd, mut store) = fresh_store();
1776 let mut idx = Index::default();
1778 for i in 0..20 {
1779 let blob = Object::Blob(crate::object::Blob {
1780 data: format!("file {i}").into_bytes(),
1781 });
1782 let bytes = serialize::serialize(&blob).unwrap();
1783 let h = store.write(&bytes).unwrap();
1784 idx.entries.push(IndexEntry {
1785 status: EntryStatus::Blob,
1786 object_hash: h,
1787 path: format!("d{}/sub/f{i}.txt", i % 5),
1788 mtime_ns: 0,
1789 size: 0,
1790 ino: 0,
1791 ctime_ns: 0,
1792 });
1793 }
1794
1795 let rec = Arc::new(RecordingSyncer::default());
1796 store.set_syncer(rec.clone());
1797
1798 let batch = store.batch();
1799 let tree_h = build_tree_from_index_with(&store, &batch, &idx, true).unwrap();
1800 batch.commit().unwrap();
1801
1802 let fulls = rec
1803 .events()
1804 .iter()
1805 .filter(|e| matches!(e, Ev::Full(_)))
1806 .count();
1807 assert_eq!(fulls, 2, "tree materialisation flush cost must be constant");
1808 assert!(store.read_object(&tree_h).is_ok());
1809
1810 let (_sd2, store2) = fresh_store();
1812 for i in 0..20 {
1813 let blob = Object::Blob(crate::object::Blob {
1814 data: format!("file {i}").into_bytes(),
1815 });
1816 store2.write(&serialize::serialize(&blob).unwrap()).unwrap();
1817 }
1818 assert_eq!(tree_h, build_tree_from_index(&store2, &idx).unwrap());
1819 }
1820
1821 #[test]
1825 fn build_tree_from_index_verify_rejects_corrupt_staged_object() {
1826 use crate::index::{EntryStatus, Index, IndexEntry};
1827
1828 let (_sd, store) = fresh_store();
1829 let blob = Object::Blob(crate::object::Blob {
1830 data: b"hello".to_vec(),
1831 });
1832 let h = store.write(&serialize::serialize(&blob).unwrap()).unwrap();
1833 let mut idx = Index::default();
1834 idx.entries.push(IndexEntry {
1835 status: EntryStatus::Blob,
1836 object_hash: h,
1837 path: "a.txt".to_string(),
1838 mtime_ns: 0,
1839 size: 0,
1840 ino: 0,
1841 ctime_ns: 0,
1842 });
1843
1844 assert!(build_tree_from_index_with(&store, &store, &idx, true).is_ok());
1846 assert!(build_tree_from_index_with(&store, &store, &idx, false).is_ok());
1847
1848 let path = store.path_for(&h);
1851 let mut bytes = std::fs::read(&path).unwrap();
1852 let i = bytes.len() - 1;
1853 bytes[i] ^= 0xFF;
1854 std::fs::write(&path, &bytes).unwrap();
1855
1856 assert!(
1858 build_tree_from_index_with(&store, &store, &idx, true).is_err(),
1859 "commit-path tree build must reject a corrupt staged object"
1860 );
1861 assert!(
1863 build_tree_from_index_with(&store, &store, &idx, false).is_ok(),
1864 "status/diff snapshot path keeps the cheap prologue-only check"
1865 );
1866 }
1867
1868 #[test]
1873 fn hash_file_object_equals_store_file_object() {
1874 let threshold = usize::try_from(CHUNK_THRESHOLD).unwrap();
1875 for len in [0usize, 1, 1024, threshold, 3 * 1024 * 1024] {
1876 let data: Vec<u8> = (0..len)
1877 .map(|i| u8::try_from((i * 31 + 7) % 251).unwrap())
1878 .collect();
1879 let (_sd, store) = fresh_store();
1880 let stored = store_file_object(&store, &data).unwrap();
1881 let pure = hash_file_object(&data).unwrap();
1882 assert_eq!(stored, pure, "len {len}: pure hash must match stored hash");
1883 }
1884 }
1885
1886 #[test]
1887 fn hash_file_object_writes_nothing() {
1888 let (_sd, store) = fresh_store();
1889 let data = vec![0xAB; 2 * 1024 * 1024]; let _ = hash_file_object(&data).unwrap();
1891 assert!(
1892 store.iter_object_hashes().unwrap().is_empty(),
1893 "pure hashing must not create objects"
1894 );
1895 }
1896
1897 fn meta_of(p: &Path) -> fs::Metadata {
1898 p.symlink_metadata().unwrap()
1899 }
1900
1901 #[test]
1902 fn stat_matches_requires_nonzero_mtime_and_equal_fields() {
1903 let work = TempDir::new().unwrap();
1904 let f = work.path().join("a.txt");
1905 fs::write(&f, b"hello").unwrap();
1906 let meta = meta_of(&f);
1907 let entry = crate::index::IndexEntry {
1908 path: "a.txt".into(),
1909 status: crate::index::EntryStatus::Blob,
1910 object_hash: crate::hash::hash(b"irrelevant"),
1911 mtime_ns: mtime_nanos(&meta),
1912 size: meta.len(),
1913 ino: 0,
1914 ctime_ns: 0,
1915 };
1916 assert!(stat_matches(&entry, &meta));
1917
1918 let mut zeroed = entry.clone();
1920 zeroed.mtime_ns = 0;
1921 assert!(!stat_matches(&zeroed, &meta), "zero sentinel must re-hash");
1922
1923 let mut wrong_size = entry.clone();
1925 wrong_size.size += 1;
1926 assert!(!stat_matches(&wrong_size, &meta));
1927
1928 let mut wrong_time = entry.clone();
1930 wrong_time.mtime_ns ^= 1;
1931 assert!(!stat_matches(&wrong_time, &meta));
1932 }
1933
1934 #[cfg(unix)]
1935 #[test]
1936 fn stat_matches_detects_exec_bit_flip() {
1937 use std::os::unix::fs::PermissionsExt;
1938 let work = TempDir::new().unwrap();
1939 let f = work.path().join("run.sh");
1940 fs::write(&f, b"#!/bin/sh\n").unwrap();
1941 let meta = meta_of(&f);
1942 let entry = crate::index::IndexEntry {
1943 path: "run.sh".into(),
1944 status: crate::index::EntryStatus::Blob,
1945 object_hash: crate::hash::hash(b"x"),
1946 mtime_ns: mtime_nanos(&meta),
1947 size: meta.len(),
1948 ino: 0,
1949 ctime_ns: 0,
1950 };
1951 assert!(stat_matches(&entry, &meta));
1952 let mtime = meta.modified().unwrap();
1955 fs::set_permissions(&f, fs::Permissions::from_mode(0o755)).unwrap();
1956 let f_handle = fs::File::options().write(true).open(&f).unwrap();
1957 f_handle
1958 .set_times(fs::FileTimes::new().set_modified(mtime))
1959 .unwrap();
1960 drop(f_handle);
1961 let meta2 = meta_of(&f);
1962 assert_eq!(mtime_nanos(&meta2), entry.mtime_ns, "mtime restored");
1963 assert!(
1964 !stat_matches(&entry, &meta2),
1965 "exec-bit flip must invalidate a Blob-status cache hit"
1966 );
1967 }
1968
1969 #[cfg(unix)]
1973 #[test]
1974 fn build_tree_reuses_hash_on_stat_match_without_reading_file() {
1975 use std::os::unix::fs::PermissionsExt;
1976 let (_sd, store) = fresh_store();
1977 let work = TempDir::new().unwrap();
1978 let f = work.path().join("locked.txt");
1979 fs::write(&f, b"cached content").unwrap();
1980
1981 let staged_hash = store_file_object(&store, b"cached content").unwrap();
1982 let meta = meta_of(&f);
1983 let idx = crate::index::Index {
1984 entries: vec![crate::index::IndexEntry {
1985 path: "locked.txt".into(),
1986 status: crate::index::EntryStatus::Blob,
1987 object_hash: staged_hash,
1988 mtime_ns: mtime_nanos(&meta),
1989 size: meta.len(),
1990 ino: 0,
1991 ctime_ns: 0,
1992 }],
1993 };
1994
1995 fs::set_permissions(&f, fs::Permissions::from_mode(0o000)).unwrap();
1997 let result = build_tree_filtered(&store, work.path(), Some(&idx));
1998 fs::set_permissions(&f, fs::Permissions::from_mode(0o644)).unwrap();
1999 let tree_h = result.expect("stat match must skip the file read");
2000
2001 let Object::Tree(t) = store.read_object(&tree_h).unwrap() else {
2002 panic!("expected tree");
2003 };
2004 assert_eq!(t.entries.len(), 1);
2005 assert_eq!(t.entries[0].object_hash, staged_hash);
2006
2007 let (_sd2, store2) = fresh_store();
2009 let f2_dir = TempDir::new().unwrap();
2010 fs::write(f2_dir.path().join("locked.txt"), b"cached content").unwrap();
2011 let plain = build_tree(&store2, f2_dir.path()).unwrap();
2012 assert_eq!(plain, tree_h, "cache hit must not change tree hashes");
2013 }
2014
2015 #[cfg(unix)]
2018 #[test]
2019 fn stat_mismatch_on_inode_rehashes() {
2020 let work = TempDir::new().unwrap();
2021 let f = work.path().join("swap.txt");
2022 fs::write(&f, b"original").unwrap();
2023 let meta = meta_of(&f);
2024 let (mtime_ns, size, ino, ctime_ns) = stat_cache_fields(&meta);
2025 let entry = crate::index::IndexEntry {
2026 path: "swap.txt".into(),
2027 status: crate::index::EntryStatus::Blob,
2028 object_hash: crate::hash::hash(b"original"),
2029 mtime_ns,
2030 size,
2031 ino,
2032 ctime_ns,
2033 };
2034 assert!(stat_matches(&entry, &meta));
2035
2036 let staging = work.path().join(".swap.new");
2039 fs::write(&staging, b"REPLACED").unwrap(); let fh = fs::File::options().write(true).open(&staging).unwrap();
2041 fh.set_times(fs::FileTimes::new().set_modified(meta.modified().unwrap()))
2042 .unwrap();
2043 drop(fh);
2044 fs::rename(&staging, &f).unwrap();
2045 let meta2 = meta_of(&f);
2046 assert_eq!(meta2.len(), entry.size, "size preserved by the swap");
2047 assert!(
2048 !stat_matches(&entry, &meta2),
2049 "a renamed-in replacement must not stat-match (ino differs)"
2050 );
2051 }
2052
2053 #[test]
2057 fn stat_mismatch_on_ctime_rehashes() {
2058 let work = TempDir::new().unwrap();
2059 let f = work.path().join("touched.txt");
2060 fs::write(&f, b"content").unwrap();
2061 let meta = meta_of(&f);
2062 let (mtime_ns, size, ino, ctime_ns) = stat_cache_fields(&meta);
2063 if ctime_ns == 0 {
2064 return; }
2066 let entry = crate::index::IndexEntry {
2067 path: "touched.txt".into(),
2068 status: crate::index::EntryStatus::Blob,
2069 object_hash: crate::hash::hash(b"content"),
2070 mtime_ns,
2071 size,
2072 ino,
2073 ctime_ns: ctime_ns ^ 1,
2074 };
2075 assert!(
2076 !stat_matches(&entry, &meta),
2077 "ctime disagreement must invalidate the cache"
2078 );
2079 }
2080
2081 #[test]
2086 fn build_tree_observed_reports_clean_rehashes() {
2087 let (_sd, store) = fresh_store();
2088 let work = TempDir::new().unwrap();
2089 fs::write(work.path().join("clean.txt"), b"clean bytes").unwrap();
2090 fs::write(work.path().join("dirty.txt"), b"new content").unwrap();
2091
2092 let clean_hash = store_file_object(&store, b"clean bytes").unwrap();
2093 let stale_hash = crate::hash::hash(b"old content");
2094 let idx = crate::index::Index {
2095 entries: vec![
2096 crate::index::IndexEntry {
2097 path: "clean.txt".into(),
2098 status: crate::index::EntryStatus::Blob,
2099 object_hash: clean_hash,
2100 mtime_ns: 0, size: 0,
2102 ino: 0,
2103 ctime_ns: 0,
2104 },
2105 crate::index::IndexEntry {
2106 path: "dirty.txt".into(),
2107 status: crate::index::EntryStatus::Blob,
2108 object_hash: stale_hash,
2109 mtime_ns: 0,
2110 size: 0,
2111 ino: 0,
2112 ctime_ns: 0,
2113 },
2114 ],
2115 };
2116 let mut obs = Vec::new();
2117 build_tree_filtered_observed(&store, work.path(), Some(&idx), &mut obs).unwrap();
2118
2119 assert_eq!(obs.len(), 1, "only the verified-clean entry is observed");
2120 let o = &obs[0];
2121 assert_eq!(o.path, "clean.txt");
2122 assert_eq!(o.object_hash, clean_hash);
2123 let meta = meta_of(&work.path().join("clean.txt"));
2124 let (mtime_ns, size, _ino, _ctime) = stat_cache_fields(&meta);
2125 assert_eq!(o.mtime_ns, mtime_ns, "observation carries the fd stat");
2126 assert_eq!(o.size, size);
2127 }
2128
2129 #[test]
2131 fn build_tree_rehashes_on_stat_mismatch() {
2132 let (_sd, store) = fresh_store();
2133 let work = TempDir::new().unwrap();
2134 let f = work.path().join("changed.txt");
2135 fs::write(&f, b"new content").unwrap();
2136 let stale_hash = crate::hash::hash(b"not the real object");
2137 let meta = meta_of(&f);
2138 let idx = crate::index::Index {
2139 entries: vec![crate::index::IndexEntry {
2140 path: "changed.txt".into(),
2141 status: crate::index::EntryStatus::Blob,
2142 object_hash: stale_hash,
2143 mtime_ns: mtime_nanos(&meta),
2145 size: meta.len() + 1,
2146 ino: 0,
2147 ctime_ns: 0,
2148 }],
2149 };
2150 let tree_h = build_tree_filtered(&store, work.path(), Some(&idx)).unwrap();
2151 let Object::Tree(t) = store.read_object(&tree_h).unwrap() else {
2152 panic!("expected tree");
2153 };
2154 assert_ne!(
2155 t.entries[0].object_hash, stale_hash,
2156 "mismatched stat must not reuse the stale hash"
2157 );
2158 assert_eq!(
2159 t.entries[0].object_hash,
2160 store_file_object(&store, b"new content").unwrap()
2161 );
2162 }
2163}