Skip to main content

mkit_core/
worktree.rs

1//! Worktree → tree-object builder.
2//!
3//! Walks a directory, applies `.mkitignore`, hashes each file as a
4//! [`Blob`](crate::object::Blob), recurses on subdirectories, validates
5//! symlink targets against path-traversal, and writes a single root
6//! [`Tree`] into the supplied [`ObjectStore`].
7//!
8//! Notes:
9//!
10//! - Files at or below [`CHUNK_THRESHOLD`] are stored as a single
11//!   [`Blob`](crate::object::Blob). Files above the threshold are
12//!   chunked with [`crate::chunker::FastCdc::v1`]; each chunk is
13//!   stored as a `Blob` and the file is represented by a
14//!   [`ChunkedBlob`] manifest whose hash
15//!   is what lands in the parent tree.
16//! - We never follow symlinks while walking. Linux/macOS `read_link`
17//!   reports the target verbatim and we hash it as a blob.
18
19use std::fs;
20use std::io::{self, Read};
21use std::path::{Path, PathBuf};
22
23use crate::chunker::{ChunkIterator, FastCdc};
24use crate::hash::Hash;
25use crate::ignore::{self, IgnoreList};
26use crate::index::{self, Index};
27use crate::object::{ChunkedBlob, EntryMode, Object, Tree, TreeEntry};
28use crate::serialize;
29use crate::store::ObjectStore;
30
31/// Files larger than this go through the chunker (1 MiB).
32pub const CHUNK_THRESHOLD: u64 = 1024 * 1024;
33
34/// Hard cap on a single file (1 GiB).
35pub const MAX_FILE_BYTES: u64 = 1024 * 1024 * 1024;
36
37/// Errors returned by this module.
38#[derive(Debug, thiserror::Error)]
39pub enum WorktreeError {
40    /// `read_link` returned a target that fails [`validate_symlink_target`].
41    #[error("symlink target '{0}' is invalid (absolute or contains '..')")]
42    InvalidSymlinkTarget(String),
43    /// File exceeded [`MAX_FILE_BYTES`].
44    #[error("file '{0}' exceeds the {MAX_FILE_BYTES} byte limit")]
45    FileTooLarge(PathBuf),
46    /// Path component had non-UTF-8 bytes; tree entry names must be UTF-8.
47    #[error("path component is not valid UTF-8")]
48    InvalidUtf8,
49    /// Underlying I/O failure.
50    #[error(transparent)]
51    Io(#[from] io::Error),
52    /// Error encoding/serialising an object on its way into the store.
53    #[error(transparent)]
54    Object(#[from] crate::object::MkitError),
55    /// Error returned by the object store.
56    #[error(transparent)]
57    Store(#[from] crate::store::StoreError),
58}
59
60/// Result alias used throughout this module.
61pub type WorktreeResult<T> = Result<T, WorktreeError>;
62
63/// Validate a symlink target: must be relative and contain no `..`
64/// segments.
65#[must_use]
66pub fn validate_symlink_target(target: &str) -> bool {
67    if target.is_empty() {
68        return false;
69    }
70    if target.starts_with('/') {
71        return false;
72    }
73    for part in target.split('/') {
74        if part == ".." {
75            return false;
76        }
77    }
78    true
79}
80
81/// Build a tree object for `dir` and its subdirectories. Honours the
82/// `.gitignore` + `.mkitignore` ignore files loaded from `dir`.
83///
84/// Ignore rules only exclude **untracked** content: a path that is tracked
85/// (or whose subtree holds tracked content) is always included even if it
86/// matches an ignore rule, so a tracked file matching `.gitignore` is never
87/// dropped from the worktree snapshot (which would misreport it as a deletion
88/// in status/diff). The staging index at `<dir>/.mkit/index` provides the
89/// tracked set; an absent index means nothing is tracked.
90///
91/// # Errors
92/// See [`WorktreeError`].
93pub fn build_tree(store: &ObjectStore, dir: &Path) -> WorktreeResult<Hash> {
94    build_tree_filtered(store, dir, None)
95}
96
97/// Like [`build_tree`], but the caller supplies the authoritative tracked
98/// set (`index`). Callers that seed their index from `HEAD` when no index
99/// file exists yet (status, restore safety) MUST pass it here so a tracked
100/// file that matches an ignore rule is not dropped right after a checkout.
101/// `None` falls back to the on-disk `<dir>/.mkit/index` (empty if absent).
102///
103/// # Errors
104/// See [`WorktreeError`].
105pub fn build_tree_filtered(
106    store: &ObjectStore,
107    dir: &Path,
108    index: Option<&Index>,
109) -> WorktreeResult<Hash> {
110    let ignores = ignore::load(dir).map_err(|e| match e {
111        crate::ignore::IgnoreError::Io(io) => WorktreeError::Io(io),
112        crate::ignore::IgnoreError::FileTooLarge => {
113            WorktreeError::Io(io::Error::other("ignore file exceeds 1 MiB"))
114        }
115    })?;
116    // Tracked set for ignore exemption: the caller's index if given, else the
117    // on-disk index (missing/unreadable = empty = nothing tracked).
118    let loaded;
119    let index = if let Some(i) = index {
120        i
121    } else {
122        loaded = index::read_index(dir).unwrap_or_default();
123        &loaded
124    };
125    build_tree_inner(store, dir, "", &ignores, index, false)
126}
127
128/// `rel_dir` is the path of `dir` relative to the repo root (empty at the
129/// root), so ignore patterns can be matched against full repo-relative paths
130/// rather than bare basenames. `parent_ignored` carries down whether an
131/// ancestor directory is ignored (git "everything under an excluded dir is
132/// excluded"); `index` is the tracked set used to exempt tracked content.
133fn build_tree_inner(
134    store: &ObjectStore,
135    dir: &Path,
136    rel_dir: &str,
137    ignores: &IgnoreList,
138    index: &Index,
139    parent_ignored: bool,
140) -> WorktreeResult<Hash> {
141    let mut entries: Vec<TreeEntry> = Vec::new();
142
143    for entry in fs::read_dir(dir)? {
144        let entry = entry?;
145        let file_name = entry.file_name();
146        let name_str = file_name
147            .to_str()
148            .ok_or(WorktreeError::InvalidUtf8)?
149            .to_string();
150        // `symlink_metadata` does not follow symlinks.
151        let meta = entry.path().symlink_metadata()?;
152        let is_dir = meta.is_dir();
153        let rel_path = if rel_dir.is_empty() {
154            name_str.clone()
155        } else {
156            format!("{rel_dir}/{name_str}")
157        };
158        // Exclude ignored content, but only when it is UNTRACKED — a tracked
159        // path (or a dir holding tracked content) is always kept so status/
160        // diff see it. An ignored dir with tracked content is descended into
161        // (carrying the ignored bit) so its untracked children stay excluded.
162        let entry_ignored = parent_ignored || ignores.is_ignored(&rel_path, is_dir);
163        if entry_ignored && !index.tracks_path_or_descendant(&rel_path) {
164            continue;
165        }
166
167        let name_bytes = name_str.as_bytes();
168        if !TreeEntry::validate_name(name_bytes) {
169            return Err(WorktreeError::Io(io::Error::new(
170                io::ErrorKind::InvalidInput,
171                format!("invalid tree entry name: {name_str:?}"),
172            )));
173        }
174
175        if meta.file_type().is_file() {
176            let (h, opened_meta) = hash_file_with_metadata(store, &entry.path())?;
177            entries.push(TreeEntry {
178                name: name_str.into_bytes(),
179                mode: entry_mode_from_file_metadata(&opened_meta),
180                object_hash: h,
181            });
182        } else if meta.file_type().is_dir() {
183            // A directory on disk at a path tracked as a *file* shadows that
184            // tracked entry. git reports only the tracked-side deletion and
185            // suppresses the directory's contents as untracked (#288); mirror
186            // that by leaving the whole subtree out of the snapshot, so the
187            // tracked file reads as deleted and nothing inside surfaces.
188            if index.has_tracked_file_at(&rel_path) {
189                continue;
190            }
191            let h = build_tree_inner(
192                store,
193                &entry.path(),
194                &rel_path,
195                ignores,
196                index,
197                entry_ignored,
198            )?;
199            entries.push(TreeEntry {
200                name: name_str.into_bytes(),
201                mode: EntryMode::Tree,
202                object_hash: h,
203            });
204        } else if meta.file_type().is_symlink() {
205            let target = fs::read_link(entry.path())?;
206            let target_str = target
207                .to_str()
208                .ok_or(WorktreeError::InvalidUtf8)?
209                .to_string();
210            if !validate_symlink_target(&target_str) {
211                return Err(WorktreeError::InvalidSymlinkTarget(target_str));
212            }
213            let blob = Object::Blob(crate::object::Blob {
214                data: target_str.as_bytes().to_vec(),
215            });
216            let bytes = serialize::serialize(&blob)?;
217            let h = store.write(&bytes)?;
218            entries.push(TreeEntry {
219                name: name_str.into_bytes(),
220                mode: EntryMode::Symlink,
221                object_hash: h,
222            });
223        } else {
224            // Block / char / fifo / socket — silently skip.
225        }
226    }
227
228    entries.sort_by(|a, b| a.name.cmp(&b.name));
229    let tree = Object::Tree(Tree { entries });
230    let bytes = serialize::serialize(&tree)?;
231    Ok(store.write(&bytes)?)
232}
233
234/// Build a tree object from an [`Index`] (the staging area).
235///
236/// Walks the flat list of entries, groups them by directory, and
237/// recursively materialises sub-tree objects so the on-disk shape
238/// matches what [`build_tree`] would produce for the same set of
239/// paths. Entries with [`crate::index::EntryStatus::Removed`] are
240/// excluded; everything else maps to an [`EntryMode`] one-to-one.
241///
242/// A file entry (Blob/Executable) may address either a single
243/// [`Blob`](crate::object::Blob) or, for content above
244/// [`CHUNK_THRESHOLD`], a [`ChunkedBlob`]
245/// manifest — exactly the two shapes `store_file_object` (and hence
246/// `add`/`hash_file`/`build_tree`) can produce. Symlink entries must be
247/// a single `Blob`. Any other object kind under a file entry is rejected.
248///
249/// # Errors
250/// - [`WorktreeError::Io`] on a [`crate::object::TreeEntry::validate_name`]
251///   failure (the path's leaf segment is reserved or alias-prone), or
252///   when a file entry points at a non-blob/non-chunked-blob object.
253/// - Wraps [`crate::MkitError`] surfaced by `serialize` / `store.write`.
254#[allow(clippy::items_after_statements, clippy::too_many_lines)]
255pub fn build_tree_from_index(
256    store: &ObjectStore,
257    index: &crate::index::Index,
258) -> WorktreeResult<Hash> {
259    use crate::index::EntryStatus;
260
261    // Build an in-memory directory tree. Each node is either a leaf
262    // (one staged blob/symlink) or a directory containing children.
263    #[derive(Default)]
264    struct Node {
265        // Subdirectory name → child node.
266        children: std::collections::BTreeMap<String, Node>,
267        // Leaf entries directly under this dir: name → (mode, hash).
268        leaves: std::collections::BTreeMap<String, (EntryMode, Hash)>,
269    }
270
271    let mut root = Node::default();
272    let mut seen_paths = std::collections::HashSet::with_capacity(index.entries.len());
273
274    for entry in &index.entries {
275        if !seen_paths.insert(entry.path.as_str()) {
276            return Err(WorktreeError::Io(io::Error::other(format!(
277                "duplicate index path: '{}'",
278                entry.path
279            ))));
280        }
281        if entry.status == EntryStatus::Removed {
282            continue;
283        }
284        let mode = match entry.status {
285            EntryStatus::Blob => EntryMode::Blob,
286            EntryStatus::Executable => EntryMode::Executable,
287            EntryStatus::Symlink => EntryMode::Symlink,
288            EntryStatus::Tree => {
289                // Reserved-but-unused per SPEC-INDEX §3. Reject for
290                // now; if a subtree-staging design lands later it
291                // can populate this branch.
292                return Err(WorktreeError::Io(io::Error::other(
293                    "index entry uses reserved Tree status (subtree staging not implemented)",
294                )));
295            }
296            EntryStatus::Removed => unreachable!("filtered above"),
297        };
298        // A regular file (Blob/Executable) may be stored as a single
299        // Blob or, for content above CHUNK_THRESHOLD, a ChunkedBlob
300        // manifest — `add`/`hash_file`/`build_tree` all route through
301        // `store_file_object`. A Symlink is always a single Blob (its
302        // target path). Accept both blob shapes for file entries so the
303        // commit/index path agrees with the worktree-hashing path; a
304        // tree/commit/etc. under a file entry is still rejected.
305        match store.read_object(&entry.object_hash)? {
306            Object::Blob(_) => {}
307            Object::ChunkedBlob(_) if mode != EntryMode::Symlink => {}
308            other => {
309                return Err(WorktreeError::Io(io::Error::other(format!(
310                    "index entry '{}' points to a non-blob object (got {})",
311                    entry.path,
312                    other.object_type().name()
313                ))));
314            }
315        }
316
317        // Split "a/b/c.txt" into ["a", "b"] + "c.txt".
318        let segments: Vec<&str> = entry.path.split('/').collect();
319        let Some((leaf, dirs)) = segments.split_last() else {
320            return Err(WorktreeError::Io(io::Error::other("empty index path")));
321        };
322        if leaf.is_empty() {
323            return Err(WorktreeError::Io(io::Error::other(
324                "trailing slash in index path",
325            )));
326        }
327
328        let mut node = &mut root;
329        let mut walked = String::new();
330        for seg in dirs {
331            if seg.is_empty() {
332                return Err(WorktreeError::Io(io::Error::other(
333                    "empty path segment in index",
334                )));
335            }
336            // Collision: this segment was previously staged as a blob
337            // (e.g. earlier index entry was `a` as a file, this one
338            // is `a/b`). Tree object format requires unique entry
339            // names per directory; emitting both would produce an
340            // invalid tree the deserializer rejects under its strict
341            // ascending-name rule.
342            if node.leaves.contains_key(*seg) {
343                let conflicting = if walked.is_empty() {
344                    (*seg).to_string()
345                } else {
346                    format!("{walked}/{seg}")
347                };
348                return Err(WorktreeError::Io(io::Error::other(format!(
349                    "index path conflict: '{conflicting}' is staged as both a file and a directory"
350                ))));
351            }
352            walked = if walked.is_empty() {
353                (*seg).to_string()
354            } else {
355                format!("{walked}/{seg}")
356            };
357            node = node.children.entry((*seg).to_string()).or_default();
358        }
359        // The reverse collision: this entry's leaf name already exists
360        // as a child directory under the same parent (an earlier
361        // entry staged `a/b` and now this one stages `a` as a file).
362        if node.children.contains_key(*leaf) {
363            let conflicting = if walked.is_empty() {
364                (*leaf).to_string()
365            } else {
366                format!("{walked}/{leaf}")
367            };
368            return Err(WorktreeError::Io(io::Error::other(format!(
369                "index path conflict: '{conflicting}' is staged as both a file and a directory"
370            ))));
371        }
372        if node
373            .leaves
374            .insert((*leaf).to_string(), (mode, entry.object_hash))
375            .is_some()
376        {
377            let duplicate = if walked.is_empty() {
378                (*leaf).to_string()
379            } else {
380                format!("{walked}/{leaf}")
381            };
382            return Err(WorktreeError::Io(io::Error::other(format!(
383                "duplicate index path: '{duplicate}'"
384            ))));
385        }
386    }
387
388    fn write_node(store: &ObjectStore, node: &Node) -> WorktreeResult<Hash> {
389        let mut entries: Vec<TreeEntry> = Vec::new();
390
391        // Subdirectories first (alphabetical via BTreeMap).
392        for (name, child) in &node.children {
393            let h = write_node(store, child)?;
394            let bytes = name.as_bytes().to_vec();
395            if !crate::object::TreeEntry::validate_name(&bytes) {
396                return Err(WorktreeError::Io(io::Error::other(format!(
397                    "invalid tree entry name: {name:?}"
398                ))));
399            }
400            entries.push(TreeEntry {
401                name: bytes,
402                mode: EntryMode::Tree,
403                object_hash: h,
404            });
405        }
406
407        // Then leaves.
408        for (name, (mode, hash)) in &node.leaves {
409            let bytes = name.as_bytes().to_vec();
410            if !crate::object::TreeEntry::validate_name(&bytes) {
411                return Err(WorktreeError::Io(io::Error::other(format!(
412                    "invalid tree entry name: {name:?}"
413                ))));
414            }
415            entries.push(TreeEntry {
416                name: bytes,
417                mode: *mode,
418                object_hash: *hash,
419            });
420        }
421
422        // Tree-entry order is name-ascending per SPEC-OBJECTS §4.
423        entries.sort_by(|a, b| a.name.cmp(&b.name));
424        let tree = Object::Tree(Tree { entries });
425        let bytes = serialize::serialize(&tree)?;
426        Ok(store.write(&bytes)?)
427    }
428
429    write_node(store, &root)
430}
431
432/// Read a file from disk, hash it, store it, and return the
433/// content-address of the resulting object.
434///
435/// Files at or below [`CHUNK_THRESHOLD`] become a single
436/// [`Blob`](crate::object::Blob). Files above the threshold are split
437/// with [`FastCdc::v1`]; each chunk is stored as a `Blob`, and the
438/// file is represented by a [`ChunkedBlob`]
439/// manifest whose hash is returned and lands in the parent tree. See
440/// `SPEC-FASTCDC.md` and `SPEC-OBJECTS.md` §7.
441///
442/// # Errors
443/// See [`WorktreeError`].
444pub fn hash_file(store: &ObjectStore, path: &Path) -> WorktreeResult<Hash> {
445    hash_file_with_metadata(store, path).map(|(hash, _)| hash)
446}
447
448/// Read a regular file without following the final path component on
449/// Unix, enforcing [`MAX_FILE_BYTES`] against both the opened handle's
450/// metadata and the actual bytes read.
451pub fn read_regular_file_bounded(path: &Path) -> WorktreeResult<(fs::Metadata, Vec<u8>)> {
452    let mut file = open_regular_file(path)?;
453    let meta = file.metadata()?;
454    if !meta.file_type().is_file() {
455        return Err(WorktreeError::Io(io::Error::new(
456            io::ErrorKind::InvalidInput,
457            "path is not a regular file",
458        )));
459    }
460    if meta.len() > MAX_FILE_BYTES {
461        return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
462    }
463    let initial_capacity = usize::try_from(meta.len().min(CHUNK_THRESHOLD))
464        .map_err(|_| WorktreeError::FileTooLarge(path.to_path_buf()))?;
465    let mut data = Vec::with_capacity(initial_capacity);
466    file.by_ref()
467        .take(MAX_FILE_BYTES + 1)
468        .read_to_end(&mut data)?;
469    if u64::try_from(data.len()).unwrap_or(u64::MAX) > MAX_FILE_BYTES {
470        return Err(WorktreeError::FileTooLarge(path.to_path_buf()));
471    }
472    Ok((meta, data))
473}
474
475fn hash_file_with_metadata(
476    store: &ObjectStore,
477    path: &Path,
478) -> WorktreeResult<(Hash, fs::Metadata)> {
479    let (meta, data) = read_regular_file_bounded(path)?;
480    let hash = store_file_object(store, &data)?;
481    Ok((hash, meta))
482}
483
484/// Store a regular file's bytes as the canonical object and return its
485/// content-address.
486///
487/// This is the single source of truth for how file content maps to an
488/// object hash, shared by [`hash_file`], [`build_tree`], and `mkit add`
489/// so all three agree on the representation:
490///
491/// - At or below [`CHUNK_THRESHOLD`]: a single
492///   [`Blob`](crate::object::Blob).
493/// - Above the threshold: `FastCdc::v1` chunks, each stored as a `Blob`,
494///   addressed by a [`ChunkedBlob`] manifest.
495///
496/// # Errors
497/// See [`WorktreeError`].
498pub fn store_file_object(store: &ObjectStore, data: &[u8]) -> WorktreeResult<Hash> {
499    if u64::try_from(data.len()).unwrap_or(u64::MAX) <= CHUNK_THRESHOLD {
500        let blob = Object::Blob(crate::object::Blob {
501            data: data.to_vec(),
502        });
503        let bytes = serialize::serialize(&blob)?;
504        return Ok(store.write(&bytes)?);
505    }
506
507    // Large file: split with FastCDC v1 via the public ChunkIterator,
508    // store each chunk as a Blob, and assemble a ChunkedBlob manifest.
509    // Per-manifest chunk count is bounded by serialize::MAX_CHUNKS
510    // (1_000_000); MAX_FILE_BYTES (1 GiB) ÷ FastCDC MIN_SIZE (16 KiB)
511    // = ~65k, well under the cap.
512    let total_size = data.len() as u64;
513    let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), data)
514        .map(|b| {
515            let chunk_blob = Object::Blob(crate::object::Blob {
516                data: data[b.offset..b.offset + b.length].to_vec(),
517            });
518            let chunk_bytes = serialize::serialize(&chunk_blob)?;
519            Ok::<_, WorktreeError>(store.write(&chunk_bytes)?)
520        })
521        .collect::<Result<_, _>>()?;
522
523    let manifest = Object::ChunkedBlob(ChunkedBlob {
524        total_size,
525        chunk_size: 0, // 0 = content-defined (FastCDC) per SPEC-OBJECTS §7
526        chunks,
527    });
528    let manifest_bytes = serialize::serialize(&manifest)?;
529    Ok(store.write(&manifest_bytes)?)
530}
531
532/// Reassemble the full byte content of a `Blob` or `ChunkedBlob` object
533/// addressed by `hash`.
534///
535/// A plain [`Blob`](crate::object::Blob) returns its bytes directly. A
536/// [`ChunkedBlob`] manifest is reassembled
537/// by concatenating each referenced chunk (every chunk must itself be a
538/// `Blob`). This is the shared counterpart to [`store_file_object`] and
539/// backs `mkit cat`, `mkit diff`, conflict rendering, and blame so they
540/// all reconstruct large-file content the same way.
541///
542/// # Errors
543/// - [`WorktreeError::Store`] if `hash` or any chunk is missing.
544/// - [`WorktreeError::Io`] if `hash` (or a chunk) resolves to an object
545///   that is neither a `Blob` nor a `ChunkedBlob` of `Blob`s.
546pub fn read_blob(store: &ObjectStore, hash: &Hash) -> WorktreeResult<Vec<u8>> {
547    match store.read_object(hash)? {
548        Object::Blob(b) => Ok(b.data),
549        Object::ChunkedBlob(manifest) => {
550            let mut data = Vec::with_capacity(usize::try_from(manifest.total_size).unwrap_or(0));
551            for chunk in &manifest.chunks {
552                match store.read_object(chunk)? {
553                    Object::Blob(b) => data.extend_from_slice(&b.data),
554                    other => {
555                        return Err(WorktreeError::Io(io::Error::other(format!(
556                            "chunk {} is not a blob (got {})",
557                            crate::hash::to_hex(chunk),
558                            other.object_type().name()
559                        ))));
560                    }
561                }
562            }
563            Ok(data)
564        }
565        other => Err(WorktreeError::Io(io::Error::other(format!(
566            "object {} is not a blob (got {})",
567            crate::hash::to_hex(hash),
568            other.object_type().name()
569        )))),
570    }
571}
572
573#[cfg(unix)]
574fn open_regular_file(path: &Path) -> io::Result<fs::File> {
575    use std::os::unix::fs::OpenOptionsExt;
576
577    fs::OpenOptions::new()
578        .read(true)
579        .custom_flags(libc::O_NOFOLLOW)
580        .open(path)
581}
582
583#[cfg(not(unix))]
584fn open_regular_file(path: &Path) -> io::Result<fs::File> {
585    // Best-effort direct-symlink rejection on platforms without the
586    // Unix O_NOFOLLOW path. This does not close the swap race, but it
587    // keeps normal symlinks from being treated as regular files.
588    let meta = path.symlink_metadata()?;
589    if !meta.file_type().is_file() {
590        return Err(io::Error::new(
591            io::ErrorKind::InvalidInput,
592            "path is not a regular file",
593        ));
594    }
595    fs::File::open(path)
596}
597
598#[cfg(unix)]
599fn entry_mode_from_file_metadata(meta: &fs::Metadata) -> EntryMode {
600    use std::os::unix::fs::PermissionsExt;
601
602    if meta.permissions().mode() & 0o111 != 0 {
603        EntryMode::Executable
604    } else {
605        EntryMode::Blob
606    }
607}
608
609#[cfg(not(unix))]
610fn entry_mode_from_file_metadata(_meta: &fs::Metadata) -> EntryMode {
611    EntryMode::Blob
612}
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617    use crate::object::ObjectType;
618    use tempfile::TempDir;
619
620    fn fresh_store() -> (TempDir, ObjectStore) {
621        let dir = TempDir::new().unwrap();
622        let store = ObjectStore::init(dir.path()).unwrap();
623        (dir, store)
624    }
625
626    #[test]
627    fn validate_symlink_targets() {
628        assert!(validate_symlink_target("hello"));
629        assert!(validate_symlink_target("sub/dir/file"));
630        assert!(!validate_symlink_target(""));
631        assert!(!validate_symlink_target("/etc/passwd"));
632        assert!(!validate_symlink_target("../escape"));
633        assert!(!validate_symlink_target("a/../b"));
634    }
635
636    #[test]
637    fn build_tree_from_empty_dir() {
638        let (_sd, store) = fresh_store();
639        let work = TempDir::new().unwrap();
640        let h = build_tree(&store, work.path()).unwrap();
641        let obj = store.read_object(&h).unwrap();
642        match obj {
643            Object::Tree(t) => assert_eq!(t.entries.len(), 0),
644            other => panic!("expected tree, got {other:?}"),
645        }
646    }
647
648    #[test]
649    fn build_tree_with_single_file() {
650        let (_sd, store) = fresh_store();
651        let work = TempDir::new().unwrap();
652        fs::write(work.path().join("hello.txt"), b"hello world").unwrap();
653        let h = build_tree(&store, work.path()).unwrap();
654        let obj = store.read_object(&h).unwrap();
655        let Object::Tree(t) = obj else {
656            panic!("expected tree");
657        };
658        assert_eq!(t.entries.len(), 1);
659        assert_eq!(t.entries[0].name.as_slice(), b"hello.txt");
660        assert_eq!(t.entries[0].mode, EntryMode::Blob);
661        let blob_obj = store.read_object(&t.entries[0].object_hash).unwrap();
662        let Object::Blob(b) = blob_obj else {
663            panic!("expected blob");
664        };
665        assert_eq!(b.data, b"hello world");
666    }
667
668    #[cfg(unix)]
669    #[test]
670    fn build_tree_marks_executable_regular_files() {
671        use std::os::unix::fs::PermissionsExt;
672
673        let (_sd, store) = fresh_store();
674        let work = TempDir::new().unwrap();
675        let script = work.path().join("run.sh");
676        fs::write(&script, b"#!/bin/sh\n").unwrap();
677        let mut perms = fs::metadata(&script).unwrap().permissions();
678        perms.set_mode(perms.mode() | 0o111);
679        fs::set_permissions(&script, perms).unwrap();
680
681        let h = build_tree(&store, work.path()).unwrap();
682        let Object::Tree(t) = store.read_object(&h).unwrap() else {
683            panic!("expected tree");
684        };
685        assert_eq!(t.entries[0].name.as_slice(), b"run.sh");
686        assert_eq!(t.entries[0].mode, EntryMode::Executable);
687    }
688
689    #[cfg(unix)]
690    #[test]
691    fn build_tree_rejects_invalid_entry_name_before_writing_tree() {
692        let (_sd, store) = fresh_store();
693        let work = TempDir::new().unwrap();
694        fs::write(work.path().join("bad."), b"bad name").unwrap();
695
696        let err = build_tree(&store, work.path()).unwrap_err();
697        assert!(matches!(err, WorktreeError::Io(_)));
698    }
699
700    #[cfg(unix)]
701    #[test]
702    fn hash_file_rejects_final_component_symlink() {
703        use std::os::unix::fs::symlink;
704
705        let (_sd, store) = fresh_store();
706        let work = TempDir::new().unwrap();
707        fs::write(work.path().join("target.txt"), b"target").unwrap();
708        symlink("target.txt", work.path().join("link.txt")).unwrap();
709
710        let err = hash_file(&store, &work.path().join("link.txt")).unwrap_err();
711        assert!(matches!(err, WorktreeError::Io(_)));
712    }
713
714    #[test]
715    fn build_tree_with_nested_directories() {
716        let (_sd, store) = fresh_store();
717        let work = TempDir::new().unwrap();
718        fs::write(work.path().join("a.txt"), b"file a").unwrap();
719        fs::create_dir(work.path().join("subdir")).unwrap();
720        fs::write(work.path().join("subdir/b.txt"), b"file b").unwrap();
721        let h = build_tree(&store, work.path()).unwrap();
722        let obj = store.read_object(&h).unwrap();
723        let Object::Tree(t) = obj else {
724            panic!("expected tree");
725        };
726        assert_eq!(t.entries.len(), 2);
727        // Sorted lex: a.txt first, subdir second.
728        assert_eq!(t.entries[0].name.as_slice(), b"a.txt");
729        assert_eq!(t.entries[1].name.as_slice(), b"subdir");
730        assert_eq!(t.entries[1].mode, EntryMode::Tree);
731        let sub = store.read_object(&t.entries[1].object_hash).unwrap();
732        let Object::Tree(st) = sub else {
733            panic!("expected tree");
734        };
735        assert_eq!(st.entries.len(), 1);
736        assert_eq!(st.entries[0].name.as_slice(), b"b.txt");
737    }
738
739    #[test]
740    fn build_tree_skips_mkit_directory() {
741        let (_sd, store) = fresh_store();
742        let work = TempDir::new().unwrap();
743        fs::create_dir(work.path().join(".mkit")).unwrap();
744        fs::write(work.path().join(".mkit/should_skip"), b"").unwrap();
745        fs::write(work.path().join("keep.txt"), b"kept").unwrap();
746        let h = build_tree(&store, work.path()).unwrap();
747        let obj = store.read_object(&h).unwrap();
748        let Object::Tree(t) = obj else {
749            panic!("expected tree");
750        };
751        assert_eq!(t.entries.len(), 1);
752        assert_eq!(t.entries[0].name.as_slice(), b"keep.txt");
753    }
754
755    #[test]
756    fn build_tree_is_deterministic() {
757        let (_sd, store) = fresh_store();
758        let work = TempDir::new().unwrap();
759        fs::write(work.path().join("z.txt"), b"z").unwrap();
760        fs::write(work.path().join("a.txt"), b"a").unwrap();
761        let h1 = build_tree(&store, work.path()).unwrap();
762        let h2 = build_tree(&store, work.path()).unwrap();
763        assert_eq!(h1, h2);
764    }
765
766    #[test]
767    fn build_tree_respects_mkitignore() {
768        let (_sd, store) = fresh_store();
769        let work = TempDir::new().unwrap();
770        fs::write(work.path().join(".mkitignore"), b"*.log\n").unwrap();
771        fs::write(work.path().join("keep.txt"), b"kept").unwrap();
772        fs::write(work.path().join("debug.log"), b"ignored").unwrap();
773        let h = build_tree(&store, work.path()).unwrap();
774        let obj = store.read_object(&h).unwrap();
775        let Object::Tree(t) = obj else {
776            panic!("expected tree");
777        };
778        // .mkitignore + keep.txt, but not debug.log.
779        assert_eq!(t.entries.len(), 2);
780        assert_eq!(t.entries[0].name.as_slice(), b".mkitignore");
781        assert_eq!(t.entries[1].name.as_slice(), b"keep.txt");
782    }
783
784    #[cfg(unix)]
785    #[test]
786    fn rejects_invalid_symlink_targets() {
787        use std::os::unix::fs::symlink;
788        let (_sd, store) = fresh_store();
789        let work = TempDir::new().unwrap();
790        symlink("/etc/passwd", work.path().join("bad-link")).unwrap();
791        let err = build_tree(&store, work.path()).unwrap_err();
792        assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
793    }
794
795    #[cfg(unix)]
796    #[test]
797    fn rejects_dotdot_symlink_targets() {
798        use std::os::unix::fs::symlink;
799        let (_sd, store) = fresh_store();
800        let work = TempDir::new().unwrap();
801        symlink("../../etc/passwd", work.path().join("bad-link")).unwrap();
802        let err = build_tree(&store, work.path()).unwrap_err();
803        assert!(matches!(err, WorktreeError::InvalidSymlinkTarget(_)));
804    }
805
806    #[test]
807    fn small_file_stays_as_regular_blob() {
808        let (_sd, store) = fresh_store();
809        let work = TempDir::new().unwrap();
810        fs::write(work.path().join("small.txt"), b"hello world").unwrap();
811        let h = build_tree(&store, work.path()).unwrap();
812        let obj = store.read_object(&h).unwrap();
813        let Object::Tree(t) = obj else {
814            panic!("expected tree");
815        };
816        let entry = store.read_object(&t.entries[0].object_hash).unwrap();
817        assert_eq!(entry.object_type(), ObjectType::Blob);
818    }
819
820    #[test]
821    fn large_file_becomes_chunked_blob() {
822        // File > CHUNK_THRESHOLD should land as a ChunkedBlob manifest
823        // pointing at one Blob per FastCDC chunk. We pseudo-randomize
824        // the buffer so FastCDC sees real boundary candidates instead
825        // of running the entire file as one max-sized chunk.
826        let (_sd, store) = fresh_store();
827        let work = TempDir::new().unwrap();
828        let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
829        let mut big = Vec::with_capacity(n);
830        let mut state: u64 = 0x00C0_FFEE;
831        for _ in 0..n {
832            // splitmix64-ish; same construction as the gear table seed.
833            state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
834            let mut z = state;
835            z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
836            z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
837            z ^= z >> 31;
838            big.push((z & 0xFF) as u8);
839        }
840        fs::write(work.path().join("big.bin"), &big).unwrap();
841
842        let tree_hash = build_tree(&store, work.path()).unwrap();
843        let Object::Tree(t) = store.read_object(&tree_hash).unwrap() else {
844            panic!("expected tree");
845        };
846        assert_eq!(t.entries.len(), 1);
847
848        let entry_hash = t.entries[0].object_hash;
849        let entry = store.read_object(&entry_hash).unwrap();
850        let Object::ChunkedBlob(manifest) = entry else {
851            panic!("expected chunked_blob, got {entry:?}");
852        };
853
854        assert_eq!(manifest.total_size, n as u64);
855        assert_eq!(manifest.chunk_size, 0, "0 = content-defined (FastCDC)");
856        assert!(!manifest.chunks.is_empty());
857        // Every chunk hash must resolve to a Blob in the store, and
858        // the concatenation must reproduce the original file bytes.
859        let mut reassembled: Vec<u8> = Vec::with_capacity(n);
860        for h in &manifest.chunks {
861            let Object::Blob(b) = store.read_object(h).unwrap() else {
862                panic!("chunk did not resolve to a Blob");
863            };
864            reassembled.extend_from_slice(&b.data);
865        }
866        assert_eq!(reassembled, big, "chunks must round-trip the source");
867    }
868
869    // ---- build_tree_from_index — the staging-area path -------------
870
871    use crate::index::{EntryStatus, Index, IndexEntry};
872
873    fn write_blob(store: &ObjectStore, bytes: &[u8]) -> Hash {
874        let blob = Object::Blob(crate::object::Blob {
875            data: bytes.to_vec(),
876        });
877        let body = serialize::serialize(&blob).unwrap();
878        store.write(&body).unwrap()
879    }
880
881    #[test]
882    fn from_index_empty_returns_empty_tree() {
883        let (_sd, store) = fresh_store();
884        let idx = Index::new();
885        let h = build_tree_from_index(&store, &idx).unwrap();
886        let Object::Tree(t) = store.read_object(&h).unwrap() else {
887            panic!("expected tree");
888        };
889        assert!(t.entries.is_empty());
890    }
891
892    #[test]
893    fn from_index_single_file_at_root() {
894        let (_sd, store) = fresh_store();
895        let blob_hash = write_blob(&store, b"hello world");
896        let mut idx = Index::new();
897        idx.entries.push(IndexEntry {
898            path: "hello.txt".into(),
899            status: EntryStatus::Blob,
900            object_hash: blob_hash,
901        });
902        let h = build_tree_from_index(&store, &idx).unwrap();
903        let Object::Tree(t) = store.read_object(&h).unwrap() else {
904            panic!();
905        };
906        assert_eq!(t.entries.len(), 1);
907        assert_eq!(t.entries[0].name, b"hello.txt");
908        assert_eq!(t.entries[0].mode, EntryMode::Blob);
909        assert_eq!(t.entries[0].object_hash, blob_hash);
910    }
911
912    #[test]
913    fn from_index_nested_paths_build_subtrees() {
914        let (_sd, store) = fresh_store();
915        let a = write_blob(&store, b"file a");
916        let b = write_blob(&store, b"file b");
917        let mut idx = Index::new();
918        idx.entries.push(IndexEntry {
919            path: "a.txt".into(),
920            status: EntryStatus::Blob,
921            object_hash: a,
922        });
923        idx.entries.push(IndexEntry {
924            path: "subdir/b.txt".into(),
925            status: EntryStatus::Blob,
926            object_hash: b,
927        });
928        let root_hash = build_tree_from_index(&store, &idx).unwrap();
929        let Object::Tree(root) = store.read_object(&root_hash).unwrap() else {
930            panic!();
931        };
932        assert_eq!(root.entries.len(), 2);
933        assert_eq!(root.entries[0].name, b"a.txt");
934        assert_eq!(root.entries[0].mode, EntryMode::Blob);
935        assert_eq!(root.entries[1].name, b"subdir");
936        assert_eq!(root.entries[1].mode, EntryMode::Tree);
937
938        let Object::Tree(sub) = store.read_object(&root.entries[1].object_hash).unwrap() else {
939            panic!();
940        };
941        assert_eq!(sub.entries.len(), 1);
942        assert_eq!(sub.entries[0].name, b"b.txt");
943        assert_eq!(sub.entries[0].object_hash, b);
944    }
945
946    #[test]
947    fn from_index_removed_entries_are_skipped() {
948        let (_sd, store) = fresh_store();
949        let a = write_blob(&store, b"keep me");
950        let mut idx = Index::new();
951        idx.entries.push(IndexEntry {
952            path: "keep.txt".into(),
953            status: EntryStatus::Blob,
954            object_hash: a,
955        });
956        idx.entries.push(IndexEntry {
957            path: "drop.txt".into(),
958            status: EntryStatus::Removed,
959            object_hash: [0; 32],
960        });
961        let h = build_tree_from_index(&store, &idx).unwrap();
962        let Object::Tree(t) = store.read_object(&h).unwrap() else {
963            panic!();
964        };
965        assert_eq!(t.entries.len(), 1);
966        assert_eq!(t.entries[0].name, b"keep.txt");
967    }
968
969    #[test]
970    fn from_index_executable_and_symlink_modes_pass_through() {
971        let (_sd, store) = fresh_store();
972        let exec = write_blob(&store, b"#!/bin/sh");
973        let link = write_blob(&store, b"target.txt");
974        let mut idx = Index::new();
975        idx.entries.push(IndexEntry {
976            path: "run.sh".into(),
977            status: EntryStatus::Executable,
978            object_hash: exec,
979        });
980        idx.entries.push(IndexEntry {
981            path: "link".into(),
982            status: EntryStatus::Symlink,
983            object_hash: link,
984        });
985        let h = build_tree_from_index(&store, &idx).unwrap();
986        let Object::Tree(t) = store.read_object(&h).unwrap() else {
987            panic!();
988        };
989        let by_name: std::collections::HashMap<&[u8], &TreeEntry> =
990            t.entries.iter().map(|e| (e.name.as_slice(), e)).collect();
991        assert_eq!(by_name[&b"run.sh"[..]].mode, EntryMode::Executable);
992        assert_eq!(by_name[&b"link"[..]].mode, EntryMode::Symlink);
993    }
994
995    #[test]
996    fn from_index_entries_are_sorted_by_name() {
997        let (_sd, store) = fresh_store();
998        let a = write_blob(&store, b"x");
999        let mut idx = Index::new();
1000        // Insert out-of-order; the on-disk Tree must still be sorted
1001        // (SPEC-OBJECTS §4 normative).
1002        idx.entries.push(IndexEntry {
1003            path: "z.txt".into(),
1004            status: EntryStatus::Blob,
1005            object_hash: a,
1006        });
1007        idx.entries.push(IndexEntry {
1008            path: "a.txt".into(),
1009            status: EntryStatus::Blob,
1010            object_hash: a,
1011        });
1012        idx.entries.push(IndexEntry {
1013            path: "m.txt".into(),
1014            status: EntryStatus::Blob,
1015            object_hash: a,
1016        });
1017        let h = build_tree_from_index(&store, &idx).unwrap();
1018        let Object::Tree(t) = store.read_object(&h).unwrap() else {
1019            panic!();
1020        };
1021        let names: Vec<&[u8]> = t.entries.iter().map(|e| e.name.as_slice()).collect();
1022        assert_eq!(names, vec![&b"a.txt"[..], b"m.txt", b"z.txt"]);
1023    }
1024
1025    #[test]
1026    fn from_index_rejects_trailing_slash() {
1027        let (_sd, store) = fresh_store();
1028        let h = write_blob(&store, b"x");
1029        let mut idx = Index::new();
1030        idx.entries.push(IndexEntry {
1031            path: "dir/".into(),
1032            status: EntryStatus::Blob,
1033            object_hash: h,
1034        });
1035        let err = build_tree_from_index(&store, &idx).unwrap_err();
1036        assert!(matches!(err, WorktreeError::Io(_)));
1037    }
1038
1039    #[test]
1040    fn from_index_rejects_empty_segment() {
1041        let (_sd, store) = fresh_store();
1042        let h = write_blob(&store, b"x");
1043        let mut idx = Index::new();
1044        idx.entries.push(IndexEntry {
1045            path: "a//b.txt".into(),
1046            status: EntryStatus::Blob,
1047            object_hash: h,
1048        });
1049        let err = build_tree_from_index(&store, &idx).unwrap_err();
1050        assert!(matches!(err, WorktreeError::Io(_)));
1051    }
1052
1053    #[test]
1054    fn from_index_rejects_reserved_name() {
1055        let (_sd, store) = fresh_store();
1056        let h = write_blob(&store, b"x");
1057        let mut idx = Index::new();
1058        // ".mkit" is rejected by TreeEntry::validate_name as repo
1059        // metadata aliasing.
1060        idx.entries.push(IndexEntry {
1061            path: ".mkit".into(),
1062            status: EntryStatus::Blob,
1063            object_hash: h,
1064        });
1065        let err = build_tree_from_index(&store, &idx).unwrap_err();
1066        assert!(matches!(err, WorktreeError::Io(_)));
1067    }
1068
1069    /// The most important invariant: for a worktree whose contents
1070    /// match the index entry-for-entry, `build_tree` and
1071    /// `build_tree_from_index` MUST produce the identical root hash.
1072    /// If this drifts, attestations signed under one path won't
1073    /// verify against trees built under the other.
1074    #[test]
1075    fn from_index_matches_build_tree_for_equivalent_worktree() {
1076        let (_sd, store) = fresh_store();
1077
1078        // Build the same content two ways:
1079        //   1. drop files on disk, call build_tree.
1080        //   2. write blobs to the store directly, populate an index,
1081        //      call build_tree_from_index.
1082        let work = TempDir::new().unwrap();
1083        fs::write(work.path().join("a.txt"), b"alpha").unwrap();
1084        fs::create_dir(work.path().join("dir")).unwrap();
1085        fs::write(work.path().join("dir/b.txt"), b"beta").unwrap();
1086        fs::write(work.path().join("dir/c.txt"), b"gamma").unwrap();
1087        let worktree_root = build_tree(&store, work.path()).unwrap();
1088
1089        let a = write_blob(&store, b"alpha");
1090        let b = write_blob(&store, b"beta");
1091        let c = write_blob(&store, b"gamma");
1092        let mut idx = Index::new();
1093        idx.entries.push(IndexEntry {
1094            path: "a.txt".into(),
1095            status: EntryStatus::Blob,
1096            object_hash: a,
1097        });
1098        idx.entries.push(IndexEntry {
1099            path: "dir/b.txt".into(),
1100            status: EntryStatus::Blob,
1101            object_hash: b,
1102        });
1103        idx.entries.push(IndexEntry {
1104            path: "dir/c.txt".into(),
1105            status: EntryStatus::Blob,
1106            object_hash: c,
1107        });
1108        let index_root = build_tree_from_index(&store, &idx).unwrap();
1109
1110        assert_eq!(
1111            worktree_root, index_root,
1112            "build_tree_from_index must produce the same root hash as build_tree for equivalent contents"
1113        );
1114    }
1115
1116    #[test]
1117    fn from_index_deeply_nested_paths_build_chain_of_subtrees() {
1118        let (_sd, store) = fresh_store();
1119        let h = write_blob(&store, b"deep");
1120        let mut idx = Index::new();
1121        idx.entries.push(IndexEntry {
1122            path: "a/b/c/d/e.txt".into(),
1123            status: EntryStatus::Blob,
1124            object_hash: h,
1125        });
1126        let root = build_tree_from_index(&store, &idx).unwrap();
1127        let Object::Tree(t) = store.read_object(&root).unwrap() else {
1128            panic!();
1129        };
1130        assert_eq!(t.entries.len(), 1);
1131        assert_eq!(t.entries[0].name, b"a");
1132        assert_eq!(t.entries[0].mode, EntryMode::Tree);
1133        // Walk down to the leaf.
1134        let mut cursor = t.entries[0].object_hash;
1135        for seg in [b"b" as &[u8], b"c", b"d"] {
1136            let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1137                panic!();
1138            };
1139            assert_eq!(t.entries.len(), 1);
1140            assert_eq!(t.entries[0].name, seg);
1141            cursor = t.entries[0].object_hash;
1142        }
1143        let Object::Tree(t) = store.read_object(&cursor).unwrap() else {
1144            panic!();
1145        };
1146        assert_eq!(t.entries[0].name, b"e.txt");
1147        assert_eq!(t.entries[0].object_hash, h);
1148    }
1149
1150    /// Path-collision: an index that stakes the same name as both a
1151    /// blob and a directory MUST be rejected. Without the check the
1152    /// builder would happily emit two `TreeEntries` with name `a`
1153    /// (one Blob, one Tree), which the deserializer rejects under
1154    /// its strict ascending-name rule. We catch it earlier with a
1155    /// clearer error so the user knows which path needs unstaging.
1156    /// (Reviewer finding 2 on PR #103.)
1157    #[test]
1158    fn from_index_rejects_blob_then_subdir_collision() {
1159        let (_sd, store) = fresh_store();
1160        let h = write_blob(&store, b"x");
1161        let mut idx = Index::new();
1162        idx.entries.push(IndexEntry {
1163            path: "a".into(),
1164            status: EntryStatus::Blob,
1165            object_hash: h,
1166        });
1167        idx.entries.push(IndexEntry {
1168            path: "a/b".into(),
1169            status: EntryStatus::Blob,
1170            object_hash: h,
1171        });
1172        let err = build_tree_from_index(&store, &idx).unwrap_err();
1173        let msg = format!("{err}");
1174        assert!(
1175            msg.contains("conflict") || msg.contains("collision") || msg.contains("'a'"),
1176            "expected collision error mentioning the path, got: {msg}"
1177        );
1178    }
1179
1180    /// Same collision in the opposite stage order: subdir entry
1181    /// staged first, then a blob at the parent.
1182    #[test]
1183    fn from_index_rejects_subdir_then_blob_collision() {
1184        let (_sd, store) = fresh_store();
1185        let h = write_blob(&store, b"x");
1186        let mut idx = Index::new();
1187        idx.entries.push(IndexEntry {
1188            path: "a/b".into(),
1189            status: EntryStatus::Blob,
1190            object_hash: h,
1191        });
1192        idx.entries.push(IndexEntry {
1193            path: "a".into(),
1194            status: EntryStatus::Blob,
1195            object_hash: h,
1196        });
1197        assert!(build_tree_from_index(&store, &idx).is_err());
1198    }
1199
1200    #[test]
1201    fn from_index_rejects_duplicate_exact_path() {
1202        let (_sd, store) = fresh_store();
1203        let a = write_blob(&store, b"a");
1204        let b = write_blob(&store, b"b");
1205        let mut idx = Index::new();
1206        idx.entries.push(IndexEntry {
1207            path: "same.txt".into(),
1208            status: EntryStatus::Blob,
1209            object_hash: a,
1210        });
1211        idx.entries.push(IndexEntry {
1212            path: "same.txt".into(),
1213            status: EntryStatus::Blob,
1214            object_hash: b,
1215        });
1216
1217        let err = build_tree_from_index(&store, &idx).unwrap_err();
1218        let msg = format!("{err}");
1219        assert!(msg.contains("duplicate index path"), "got: {msg}");
1220    }
1221
1222    #[test]
1223    fn from_index_rejects_duplicate_removed_and_live_path() {
1224        let (_sd, store) = fresh_store();
1225        let h = write_blob(&store, b"live");
1226        let mut idx = Index::new();
1227        idx.entries.push(IndexEntry {
1228            path: "same.txt".into(),
1229            status: EntryStatus::Removed,
1230            object_hash: [0; 32],
1231        });
1232        idx.entries.push(IndexEntry {
1233            path: "same.txt".into(),
1234            status: EntryStatus::Blob,
1235            object_hash: h,
1236        });
1237
1238        let err = build_tree_from_index(&store, &idx).unwrap_err();
1239        let msg = format!("{err}");
1240        assert!(msg.contains("duplicate index path"), "got: {msg}");
1241    }
1242
1243    /// All-Removed index → empty root tree, NOT an error.
1244    /// (Reviewer finding 1 on PR #103.) `staged_count()` excludes
1245    /// Removed entries by design; the tree builder does too. The
1246    /// resulting empty tree is a valid commit target — applying a
1247    /// removals-only changeset to a tree that previously contained
1248    /// those paths produces an empty root.
1249    #[test]
1250    fn from_index_all_removed_produces_empty_tree() {
1251        let (_sd, store) = fresh_store();
1252        let mut idx = Index::new();
1253        idx.entries.push(IndexEntry {
1254            path: "gone.txt".into(),
1255            status: EntryStatus::Removed,
1256            object_hash: [0; 32],
1257        });
1258        let h = build_tree_from_index(&store, &idx).unwrap();
1259        let Object::Tree(t) = store.read_object(&h).unwrap() else {
1260            panic!();
1261        };
1262        assert!(t.entries.is_empty());
1263    }
1264
1265    /// Sanity: `ObjectType::Tree` is what we materialise. Pin so a
1266    /// future enum reshuffle catches us.
1267    #[test]
1268    fn from_index_root_is_a_tree_object() {
1269        let (_sd, store) = fresh_store();
1270        let idx = Index::new();
1271        let h = build_tree_from_index(&store, &idx).unwrap();
1272        let obj = store.read_object(&h).unwrap();
1273        assert_eq!(obj.object_type(), ObjectType::Tree);
1274    }
1275
1276    #[test]
1277    fn from_index_rejects_missing_blob_object() {
1278        let (_sd, store) = fresh_store();
1279        let mut idx = Index::new();
1280        idx.entries.push(IndexEntry {
1281            path: "missing.txt".into(),
1282            status: EntryStatus::Blob,
1283            object_hash: [42; 32],
1284        });
1285
1286        let err = build_tree_from_index(&store, &idx).unwrap_err();
1287        assert!(matches!(err, WorktreeError::Store(_)));
1288    }
1289
1290    #[test]
1291    fn from_index_rejects_non_blob_object_for_blob_status() {
1292        let (_sd, store) = fresh_store();
1293        let tree = Object::Tree(Tree { entries: vec![] });
1294        let body = serialize::serialize(&tree).unwrap();
1295        let tree_hash = store.write(&body).unwrap();
1296        let mut idx = Index::new();
1297        idx.entries.push(IndexEntry {
1298            path: "not-a-blob.txt".into(),
1299            status: EntryStatus::Blob,
1300            object_hash: tree_hash,
1301        });
1302
1303        let err = build_tree_from_index(&store, &idx).unwrap_err();
1304        let msg = format!("{err}");
1305        assert!(
1306            msg.contains("non-blob"),
1307            "expected non-blob index object error, got: {msg}"
1308        );
1309    }
1310
1311    /// A file entry whose object is a `ChunkedBlob` (the canonical
1312    /// representation for > `CHUNK_THRESHOLD` content) is accepted by the
1313    /// commit/index tree builder, NOT rejected as "non-blob" (#203). The
1314    /// resulting tree carries an `EntryMode::Blob` pointing at the
1315    /// manifest, exactly as `build_tree` produces for a large worktree
1316    /// file.
1317    #[test]
1318    fn from_index_accepts_chunked_blob_for_file_entry() {
1319        let (_sd, store) = fresh_store();
1320        // Build a > CHUNK_THRESHOLD file's content and store it via the
1321        // shared object path (lands as a ChunkedBlob).
1322        let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1323        let mut big = Vec::with_capacity(n);
1324        let mut state: u64 = 0x00C0_FFEE;
1325        for _ in 0..n {
1326            state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
1327            let mut z = state;
1328            z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
1329            z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
1330            z ^= z >> 31;
1331            big.push((z & 0xFF) as u8);
1332        }
1333        let chunked_hash = store_file_object(&store, &big).unwrap();
1334        assert!(
1335            matches!(
1336                store.read_object(&chunked_hash).unwrap(),
1337                Object::ChunkedBlob(_)
1338            ),
1339            "fixture must be a ChunkedBlob"
1340        );
1341
1342        let mut idx = Index::new();
1343        idx.entries.push(IndexEntry {
1344            path: "big.bin".into(),
1345            status: EntryStatus::Blob,
1346            object_hash: chunked_hash,
1347        });
1348        let root = build_tree_from_index(&store, &idx).unwrap();
1349        let Object::Tree(t) = store.read_object(&root).unwrap() else {
1350            panic!("expected tree");
1351        };
1352        assert_eq!(t.entries.len(), 1);
1353        assert_eq!(t.entries[0].name, b"big.bin");
1354        assert_eq!(t.entries[0].mode, EntryMode::Blob);
1355        assert_eq!(t.entries[0].object_hash, chunked_hash);
1356        // Reassembly via the shared helper round-trips the source bytes.
1357        assert_eq!(read_blob(&store, &chunked_hash).unwrap(), big);
1358    }
1359
1360    /// A symlink entry MUST still address a single `Blob` (its target
1361    /// path); a `ChunkedBlob` under a symlink entry is rejected.
1362    #[test]
1363    fn from_index_rejects_chunked_blob_for_symlink_entry() {
1364        let (_sd, store) = fresh_store();
1365        let n = usize::try_from(CHUNK_THRESHOLD).unwrap() + 256 * 1024;
1366        let big = vec![0xABu8; n];
1367        let chunked_hash = store_file_object(&store, &big).unwrap();
1368        let mut idx = Index::new();
1369        idx.entries.push(IndexEntry {
1370            path: "link".into(),
1371            status: EntryStatus::Symlink,
1372            object_hash: chunked_hash,
1373        });
1374        let err = build_tree_from_index(&store, &idx).unwrap_err();
1375        assert!(format!("{err}").contains("non-blob"));
1376    }
1377}