Skip to main content

repo/
repository_tree.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Tree building and materialization helpers.
3
4use std::{collections::HashSet, fs, path::Path, time::Instant};
5
6use objects::{
7    object::{Blob, ContentHash, Tree, TreeEntry},
8    store::ObjectStore,
9    util::gitlink_placeholder_bytes,
10    worktree::WorktreeStatus,
11};
12use tracing::{debug, instrument, trace, warn};
13
14use super::{
15    HeddleError, Repository, Result,
16    repository_worktree_status::{WorktreeStatusDetailed, compare_worktree_with_index_detailed},
17};
18use crate::{
19    FsMonitorSettings, WorktreeIndex, WorktreeStatusOptions,
20    fsmonitor::ChangeMonitorSession,
21    worktree_ignore::WorktreeIgnoreMatcher,
22    worktree_index::{WorktreeIndexLoadStats, WorktreeIndexSaveStats},
23    worktree_walk::{
24        WalkDirectory, WalkEntry, WorktreeWalkPolicy, read_blob_with_hash, validate_symlink_target,
25        walk_worktree,
26    },
27};
28
29#[derive(Debug, Clone, Default)]
30pub struct WorktreeCompareProfile {
31    pub index_load_ms: u128,
32    pub index_snapshot_load_ms: u128,
33    pub index_journal_replay_ms: u128,
34    pub index_snapshot_bytes: u64,
35    pub index_journal_bytes: u64,
36    pub index_journal_ops: usize,
37    pub monitor_prepare_ms: u128,
38    pub compare_ms: u128,
39    pub index_save_ms: u128,
40    pub index_snapshot_write_ms: u128,
41    pub index_journal_append_ms: u128,
42    pub index_save_snapshot_bytes: u64,
43    pub index_save_journal_bytes: u64,
44    pub index_save_journal_ops: usize,
45    pub index_save_compacted: bool,
46    pub monitor_persist_ms: u128,
47    pub untracked_flatten_ms: u128,
48    pub untracked_flattened_paths: usize,
49    pub tracked_refresh_ms: u128,
50    pub untracked_scan_ms: u128,
51    pub hashing_ms: u128,
52    pub directory_cache_compare_ms: u128,
53    pub directories_scanned: u64,
54    pub directories_skipped: u64,
55    pub files_hashed: u64,
56    pub cache_hits: u64,
57    pub monitor_changed_paths: u64,
58    pub monitor_skipped_directories: u64,
59}
60
61#[derive(Debug, Clone, Default)]
62pub struct TreeBuildProfile {
63    pub tree_walk_ms: u128,
64    pub blob_prep_ms: u128,
65    pub blob_write_ms: u128,
66    pub tree_write_ms: u128,
67    pub file_count: usize,
68    pub dir_count: usize,
69}
70
71#[derive(Debug, Clone)]
72struct TreeBuildOutput {
73    tree: Tree,
74    profile: TreeBuildProfile,
75}
76
77impl Repository {
78    /// Build a tree from a directory.
79    #[instrument(skip(self), fields(dir = %dir.display()))]
80    pub fn build_tree(&self, dir: &Path) -> Result<Tree> {
81        self.build_tree_profiled(dir).map(|(tree, _)| tree)
82    }
83
84    /// Build a tree from a directory, reusing per-file hashes from a
85    /// thread manifest when the on-disk `(inode, mtime, ctime, mode)`
86    /// still matches the recorded snapshot.
87    ///
88    /// Same output as [`Self::build_tree`] — a complete `Tree` object —
89    /// but files whose stat fields match the cache skip the
90    /// `read + hash + put_blob` cycle entirely. Net effect on
91    /// `capture_thread_from_disk` for a single-file edit on a 643-file
92    /// fixture: blob work drops from ~30 MB of reads to ~one file's
93    /// worth. Wall-clock follows.
94    ///
95    /// Safe-by-default: any uncertainty (entry missing from cache,
96    /// stat mismatch) falls back to the full read path for that
97    /// specific file. Other files in the same tree still benefit.
98    pub fn build_tree_with_stat_cache(
99        &self,
100        dir: &Path,
101        manifest: &crate::thread_manifest::ThreadManifest,
102    ) -> Result<Tree> {
103        self.build_tree_profiled_inner(dir, None, Some(manifest))
104            .map(|(tree, _)| tree)
105    }
106
107    #[instrument(skip(self), fields(dir = %dir.display()))]
108    pub fn build_tree_profiled(&self, dir: &Path) -> Result<(Tree, TreeBuildProfile)> {
109        self.build_tree_profiled_inner(dir, None, None)
110    }
111
112    pub(crate) fn build_tree_profiled_against(
113        &self,
114        dir: &Path,
115        baseline_tree: Option<&Tree>,
116    ) -> Result<(Tree, TreeBuildProfile)> {
117        self.build_tree_profiled_inner(dir, baseline_tree, None)
118    }
119
120    /// Profiled tree-build that reuses a manifest's stat-cache. Same
121    /// contract as [`Self::build_tree_profiled`] — returns the full
122    /// `(Tree, TreeBuildProfile)` for downstream timing — but skips
123    /// the `read + hash + put_blob` cycle for files whose stat fields
124    /// match the cache. The fall-through path for changed/new files
125    /// is identical, so the resulting tree is byte-identical to what
126    /// the un-cached build would produce.
127    #[instrument(skip(self, manifest), fields(dir = %dir.display()))]
128    pub fn build_tree_profiled_with_stat_cache(
129        &self,
130        dir: &Path,
131        manifest: &crate::thread_manifest::ThreadManifest,
132    ) -> Result<(Tree, TreeBuildProfile)> {
133        self.build_tree_profiled_inner(dir, None, Some(manifest))
134    }
135
136    pub(crate) fn build_tree_profiled_with_stat_cache_against(
137        &self,
138        dir: &Path,
139        baseline_tree: Option<&Tree>,
140        manifest: &crate::thread_manifest::ThreadManifest,
141    ) -> Result<(Tree, TreeBuildProfile)> {
142        self.build_tree_profiled_inner(dir, baseline_tree, Some(manifest))
143    }
144
145    fn build_tree_profiled_inner(
146        &self,
147        dir: &Path,
148        baseline_tree: Option<&Tree>,
149        stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
150    ) -> Result<(Tree, TreeBuildProfile)> {
151        let patterns = self.ignore_patterns()?;
152        debug!(pattern_count = patterns.len(), "Starting tree build");
153        let start = Instant::now();
154        let nested_exclusions = self.nested_thread_worktree_exclusions(dir)?;
155        let tree =
156            self.build_tree_walk(dir, &patterns, nested_exclusions, baseline_tree, stat_cache);
157        let elapsed = start.elapsed().as_millis();
158        debug!(duration_ms = elapsed, "Tree build complete");
159        tree.map(|output| {
160            let mut profile = output.profile;
161            profile.tree_walk_ms = elapsed;
162            (output.tree, profile)
163        })
164    }
165
166    #[instrument(skip(self, patterns, nested_exclusions, baseline_tree, stat_cache), fields(dir = %dir.display()))]
167    fn build_tree_walk(
168        &self,
169        dir: &Path,
170        patterns: &[String],
171        nested_exclusions: Vec<std::path::PathBuf>,
172        baseline_tree: Option<&Tree>,
173        stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
174    ) -> Result<TreeBuildOutput> {
175        let ignore_matcher =
176            WorktreeIgnoreMatcher::new(patterns).with_nested_worktree_exclusions(nested_exclusions);
177        let mut policy = TreeBuildPolicy::new(self, dir, stat_cache);
178        let mut output = walk_worktree(self, dir, &ignore_matcher, baseline_tree, &mut policy)?;
179
180        // Flush every newly-seen blob as a single packfile. Stores
181        // that don't override `put_blobs_packed` fall back to per-blob
182        // writes (correct, just slower). Time is folded into
183        // `blob_write_ms` so the existing perf profile keeps tracking
184        // total blob-storage cost.
185        if !policy.pending_blobs.is_empty() {
186            let flush_start = Instant::now();
187            let pending = std::mem::take(&mut policy.pending_blobs);
188            self.store.put_blobs_packed(pending)?;
189            output.profile.blob_write_ms += flush_start.elapsed().as_millis();
190        }
191
192        Ok(output)
193    }
194
195    /// Compare the worktree against a tree using the persisted binary index.
196    pub fn compare_worktree_cached(&self, tree: &Tree) -> Result<WorktreeStatus> {
197        self.compare_worktree_cached_with_options(tree, &self.default_worktree_status_options())
198    }
199
200    pub fn compare_worktree_cached_detailed(&self, tree: &Tree) -> Result<WorktreeStatusDetailed> {
201        self.compare_worktree_cached_detailed_with_options(
202            tree,
203            &self.default_worktree_status_options(),
204        )
205    }
206
207    /// Compare the worktree against a tree using the persisted binary index.
208    pub fn compare_worktree_cached_with_options(
209        &self,
210        tree: &Tree,
211        options: &WorktreeStatusOptions,
212    ) -> Result<WorktreeStatus> {
213        self.compare_worktree_cached_profiled_with_options(tree, options)
214            .map(|(status, _)| status)
215    }
216
217    pub fn compare_worktree_cached_detailed_with_options(
218        &self,
219        tree: &Tree,
220        options: &WorktreeStatusOptions,
221    ) -> Result<WorktreeStatusDetailed> {
222        self.compare_worktree_cached_detailed_profiled_with_options(tree, options)
223            .map(|(status, _)| status)
224    }
225
226    pub fn compare_worktree_cached_profiled_with_options(
227        &self,
228        tree: &Tree,
229        options: &WorktreeStatusOptions,
230    ) -> Result<(WorktreeStatus, WorktreeCompareProfile)> {
231        let (detailed_status, mut profile) =
232            self.compare_worktree_cached_detailed_profiled_with_options(tree, options)?;
233        let flatten_start = Instant::now();
234        let flattened_paths = detailed_status.untracked.flattened_path_count();
235        let mut status = detailed_status.into_flat_status();
236        profile.untracked_flatten_ms = flatten_start.elapsed().as_millis();
237        profile.untracked_flattened_paths = flattened_paths;
238        status.modified.sort();
239        status.added.sort();
240        status.deleted.sort();
241        Ok((status, profile))
242    }
243
244    pub fn compare_worktree_cached_detailed_profiled_with_options(
245        &self,
246        tree: &Tree,
247        options: &WorktreeStatusOptions,
248    ) -> Result<(WorktreeStatusDetailed, WorktreeCompareProfile)> {
249        let index_path = self.worktree_index_path();
250        let load_start = Instant::now();
251        let (mut index, load_stats) = match WorktreeIndex::load_profiled(&index_path) {
252            Ok(result) => result,
253            Err(error) => {
254                warn!(path = %index_path.display(), %error, "Ignoring unreadable worktree index");
255                (WorktreeIndex::new(), WorktreeIndexLoadStats::default())
256            }
257        };
258        let index_load_ms = load_start.elapsed().as_millis();
259
260        let monitor_prepare_start = Instant::now();
261        let monitor = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
262        let monitor_prepare_ms = monitor_prepare_start.elapsed().as_millis();
263
264        let patterns = self.ignore_patterns()?;
265        let nested_exclusions = self.nested_thread_worktree_exclusions(self.root())?;
266        let ignore_matcher = WorktreeIgnoreMatcher::new(&patterns)
267            .with_nested_worktree_exclusions(nested_exclusions);
268        let compare_start = Instant::now();
269        let (status, stats) = compare_worktree_with_index_detailed(
270            self,
271            tree,
272            &ignore_matcher,
273            &mut index,
274            &monitor,
275        )?;
276        let compare_ms = compare_start.elapsed().as_millis();
277
278        let save_start = Instant::now();
279        let (index_save_ms, save_stats) = if index.is_dirty() {
280            match index.save_profiled(&index_path) {
281                Ok(stats) => {
282                    index.mark_clean();
283                    (save_start.elapsed().as_millis(), stats)
284                }
285                Err(error) => {
286                    warn!(path = %index_path.display(), %error, "Failed to persist worktree index");
287                    (0, WorktreeIndexSaveStats::default())
288                }
289            }
290        } else {
291            (0, WorktreeIndexSaveStats::default())
292        };
293
294        let persist_start = Instant::now();
295        if let Err(error) = monitor.persist() {
296            warn!(path = %self.root().display(), %error, "Failed to persist monitor state");
297        }
298        let monitor_persist_ms = persist_start.elapsed().as_millis();
299
300        debug!(
301            index_load_ms,
302            index_snapshot_load_ms = load_stats.snapshot_load_ms,
303            index_journal_replay_ms = load_stats.journal_replay_ms,
304            index_snapshot_bytes = load_stats.snapshot_bytes,
305            index_journal_bytes = load_stats.journal_bytes,
306            index_journal_ops = load_stats.journal_ops,
307            monitor_prepare_ms,
308            compare_ms,
309            index_save_ms,
310            index_snapshot_write_ms = save_stats.snapshot_write_ms,
311            index_journal_append_ms = save_stats.journal_append_ms,
312            index_save_snapshot_bytes = save_stats.snapshot_bytes,
313            index_save_journal_bytes = save_stats.journal_bytes,
314            index_save_journal_ops = save_stats.journal_ops,
315            index_save_compacted = save_stats.compacted,
316            index_save_compact_reason = save_stats.compact_reason.unwrap_or("none"),
317            monitor_persist_ms,
318            tracked_refresh_ms = stats.tracked_refresh_ms,
319            untracked_scan_ms = stats.untracked_scan_ms,
320            untracked_flatten_ms = 0,
321            untracked_flattened_paths = 0,
322            hashing_ms = stats.hashing_ms,
323            directory_cache_compare_ms = stats.directory_cache_compare_ms,
324            directories_scanned = stats.directories_scanned,
325            directories_skipped = stats.directories_skipped,
326            files_hashed = stats.files_hashed,
327            cache_hits = stats.cache_hits,
328            monitor_backend = monitor.backend.unwrap_or("off"),
329            monitor_status = ?monitor.status,
330            monitor_reason = monitor.reason.as_deref().unwrap_or("ready"),
331            monitor_changed_paths = stats.monitor_changed_paths,
332            monitor_skipped_directories = stats.monitor_skipped_directories,
333            "Worktree compare complete"
334        );
335
336        Ok((
337            status,
338            WorktreeCompareProfile {
339                index_load_ms,
340                index_snapshot_load_ms: load_stats.snapshot_load_ms,
341                index_journal_replay_ms: load_stats.journal_replay_ms,
342                index_snapshot_bytes: load_stats.snapshot_bytes,
343                index_journal_bytes: load_stats.journal_bytes,
344                index_journal_ops: load_stats.journal_ops,
345                monitor_prepare_ms,
346                compare_ms,
347                index_save_ms,
348                index_snapshot_write_ms: save_stats.snapshot_write_ms,
349                index_journal_append_ms: save_stats.journal_append_ms,
350                index_save_snapshot_bytes: save_stats.snapshot_bytes,
351                index_save_journal_bytes: save_stats.journal_bytes,
352                index_save_journal_ops: save_stats.journal_ops,
353                index_save_compacted: save_stats.compacted,
354                monitor_persist_ms,
355                untracked_flatten_ms: 0,
356                untracked_flattened_paths: 0,
357                tracked_refresh_ms: stats.tracked_refresh_ms,
358                untracked_scan_ms: stats.untracked_scan_ms,
359                hashing_ms: stats.hashing_ms,
360                directory_cache_compare_ms: stats.directory_cache_compare_ms,
361                directories_scanned: stats.directories_scanned,
362                directories_skipped: stats.directories_skipped,
363                files_hashed: stats.files_hashed,
364                cache_hits: stats.cache_hits,
365                monitor_changed_paths: stats.monitor_changed_paths,
366                monitor_skipped_directories: stats.monitor_skipped_directories,
367            },
368        ))
369    }
370
371    /// Return whether the worktree matches the provided tree.
372    pub fn worktree_is_clean_cached(&self, tree: &Tree) -> Result<bool> {
373        self.worktree_is_clean_cached_with_options(tree, &self.default_worktree_status_options())
374    }
375
376    /// Return whether the worktree matches the provided tree.
377    pub fn worktree_is_clean_cached_with_options(
378        &self,
379        tree: &Tree,
380        options: &WorktreeStatusOptions,
381    ) -> Result<bool> {
382        Ok(self
383            .compare_worktree_cached_detailed_with_options(tree, options)?
384            .is_clean())
385    }
386
387    fn worktree_index_path(&self) -> std::path::PathBuf {
388        self.root.join(".heddle/state").join("index.bin")
389    }
390
391    fn default_worktree_status_options(&self) -> WorktreeStatusOptions {
392        WorktreeStatusOptions {
393            fsmonitor: FsMonitorSettings::from(self.config.worktree.fsmonitor),
394        }
395    }
396
397    pub fn inspect_change_monitor_with_options(
398        &self,
399        options: &WorktreeStatusOptions,
400    ) -> Result<crate::ChangeMonitorReport> {
401        let session = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
402        let report = session.report();
403        session.persist()?;
404        Ok(report)
405    }
406}
407
408#[derive(Default)]
409struct TreeBuildState {
410    entries: Vec<TreeEntry>,
411    profile: TreeBuildProfile,
412}
413
414struct TreeBuildPolicy<'a> {
415    repo: &'a Repository,
416    /// Walk root, used to compute paths relative to it so they line
417    /// up with manifest keys (`src/foo.rs`, not absolute paths).
418    walk_root: &'a Path,
419    /// Optional stat-cache. When present, files whose disk stat
420    /// `(inode, mtime, ctime, mode)` matches the recorded entry get
421    /// their hash reused — no `read + hash + put_blob` cycle. Tracked
422    /// in `stat_cache_hits` for diagnostics.
423    stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
424    stat_cache_hits: u64,
425    /// Blobs encountered during the walk that aren't already in the
426    /// store. Drained once at the end of the walk into a single
427    /// packfile via `ObjectStore::put_blobs_packed` — turns N×fsync
428    /// per blob into 2×fsync total (the .pack + .idx).
429    pending_blobs: Vec<(ContentHash, Vec<u8>)>,
430    /// Hashes already queued in `pending_blobs` so we don't double-add
431    /// content-equal files (which is common: README.md, .gitkeep, etc).
432    seen: HashSet<ContentHash>,
433}
434
435impl<'a> TreeBuildPolicy<'a> {
436    fn new(
437        repo: &'a Repository,
438        walk_root: &'a Path,
439        stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
440    ) -> Self {
441        Self {
442            repo,
443            walk_root,
444            stat_cache,
445            stat_cache_hits: 0,
446            pending_blobs: Vec::new(),
447            seen: HashSet::new(),
448        }
449    }
450
451    /// Look up `entry`'s manifest record by relative path and, if
452    /// found, compare the on-disk `(inode, mtime, ctime, mode)` to
453    /// the recorded snapshot. Returns the cached hash when the
454    /// match is exact; `None` otherwise. The caller falls back to
455    /// the read-and-hash path.
456    fn lookup_stat_cache_hash(&self, entry: &WalkEntry<'_>) -> Option<ContentHash> {
457        let cache = self.stat_cache?;
458        let rel = entry.path.strip_prefix(self.walk_root).ok()?;
459        // Manifest keys use forward-slash separators (cross-platform
460        // by construction; see `populate_manifest_from_tree`).
461        let mut rel_str = String::with_capacity(rel.as_os_str().len());
462        for (i, component) in rel.components().enumerate() {
463            let std::path::Component::Normal(s) = component else {
464                return None;
465            };
466            if i > 0 {
467                rel_str.push('/');
468            }
469            rel_str.push_str(s.to_str()?);
470        }
471        let cached = cache.files.get(&rel_str)?;
472        let (size, inode, mtime_ns, ctime_ns, mode) =
473            crate::stat_signature::stat_signature(entry.path, &entry.metadata);
474        let stat = crate::thread_manifest::ManifestFile {
475            hash: cached.hash,
476            size,
477            inode,
478            mtime_ns,
479            ctime_ns,
480            mode,
481        };
482        if stat.matches(cached) {
483            Some(cached.hash)
484        } else {
485            None
486        }
487    }
488
489    /// Push a blob into the pending pack if it's not already in the
490    /// store and not already queued. The hash is always the canonical
491    /// blob hash — caller passes a precomputed one to avoid hashing
492    /// twice.
493    fn enqueue_blob(&mut self, blob: Blob, hash: ContentHash) -> Result<()> {
494        if self.seen.contains(&hash) {
495            return Ok(());
496        }
497        if self.repo.store.has_blob(&hash)? {
498            self.seen.insert(hash);
499            return Ok(());
500        }
501        self.seen.insert(hash);
502        self.pending_blobs.push((hash, blob.into_content()));
503        Ok(())
504    }
505}
506
507impl WorktreeWalkPolicy for TreeBuildPolicy<'_> {
508    type DirectoryState = TreeBuildState;
509    type Output = TreeBuildOutput;
510
511    fn enter_directory(
512        &mut self,
513        _directory: &WalkDirectory<'_>,
514        _tree: Option<&Tree>,
515    ) -> Result<Self::DirectoryState> {
516        Ok(TreeBuildState::default())
517    }
518
519    fn visit_file(
520        &mut self,
521        entry: WalkEntry<'_>,
522        tree_entry: Option<&TreeEntry>,
523        state: &mut Self::DirectoryState,
524    ) -> Result<()> {
525        trace!(file = %entry.path.display(), size = entry.metadata.len(), "Processing file");
526
527        if let Some(target) = tree_entry.and_then(TreeEntry::gitlink_target) {
528            let read_start = Instant::now();
529            let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
530            let read_elapsed = read_start.elapsed().as_millis();
531            if blob.content() == gitlink_placeholder_bytes(&target) {
532                state.profile.file_count += 1;
533                state.profile.blob_prep_ms += read_elapsed;
534                state
535                    .entries
536                    .push(TreeEntry::gitlink(entry.name.to_string(), target)?);
537                return Ok(());
538            }
539
540            let enqueue_start = Instant::now();
541            self.enqueue_blob(blob, hash)?;
542            let enqueue_elapsed = enqueue_start.elapsed().as_millis();
543            state.profile.file_count += 1;
544            state.profile.blob_prep_ms += read_elapsed;
545            state.profile.blob_write_ms += enqueue_elapsed;
546            state.entries.push(TreeEntry::file(
547                entry.name.to_string(),
548                hash,
549                entry.executable,
550            )?);
551            return Ok(());
552        }
553
554        // Stat-cache fast path: when this build is on behalf of a
555        // capture against a previously-materialised thread, reuse the
556        // recorded hash if the file's stat fields haven't shifted
557        // since materialise time. Skips the read+hash entirely for
558        // unchanged files — the dominant cost on a "one file edited
559        // in a big repo" capture.
560        if let Some(hash) = self.lookup_stat_cache_hash(&entry) {
561            self.stat_cache_hits += 1;
562            state.profile.file_count += 1;
563            state.entries.push(TreeEntry::file(
564                entry.name.to_string(),
565                hash,
566                entry.executable,
567            )?);
568            return Ok(());
569        }
570
571        let read_start = Instant::now();
572        let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
573        let read_elapsed = read_start.elapsed().as_millis();
574        trace!(duration_ms = read_elapsed, "File read complete");
575
576        // Defer the actual write — we accumulate every new blob and
577        // install them as a single pack at the end of the walk
578        // (one fsync regardless of file count, vs. ~30ms per loose
579        // file on macOS). The tree entry only needs the hash.
580        let enqueue_start = Instant::now();
581        self.enqueue_blob(blob, hash)?;
582        let enqueue_elapsed = enqueue_start.elapsed().as_millis();
583
584        state.profile.file_count += 1;
585        state.profile.blob_prep_ms += read_elapsed;
586        state.profile.blob_write_ms += enqueue_elapsed;
587        state.entries.push(TreeEntry::file(
588            entry.name.to_string(),
589            hash,
590            entry.executable,
591        )?);
592        Ok(())
593    }
594
595    fn visit_symlink(
596        &mut self,
597        entry: WalkEntry<'_>,
598        _tree_entry: Option<&TreeEntry>,
599        state: &mut Self::DirectoryState,
600    ) -> Result<()> {
601        let target = fs::read_link(entry.path)?;
602        // Validate symlink escape against the *walk root*, not
603        // `repo.root()`. When `capture_thread_from_disk` builds a
604        // tree from a dedicated thread worktree, the walk root is
605        // the thread's checkout path (not the main repo) and
606        // symlinks should be allowed to point inside it. Pre-fix
607        // every symlink in such a worktree was rejected the moment
608        // the slow path ran, breaking `thread switch` auto-capture
609        // for any thread containing a symlink. For the common case
610        // where `build_tree(self.root)` runs against the main repo
611        // root, `walk_root == self.repo.root()` and behaviour is
612        // unchanged.
613        let symlink_dir = entry.path.parent().unwrap_or(self.walk_root);
614        if !validate_symlink_target(self.walk_root, symlink_dir, &target) {
615            return Err(HeddleError::InvalidSymlinkTarget(target));
616        }
617
618        let blob = Blob::new(objects::util::symlink_target_bytes(&target));
619        let hash = blob.hash();
620        let enqueue_start = Instant::now();
621        self.enqueue_blob(blob, hash)?;
622        state.profile.blob_write_ms += enqueue_start.elapsed().as_millis();
623        state
624            .entries
625            .push(TreeEntry::symlink(entry.name.to_string(), hash)?);
626        Ok(())
627    }
628
629    fn visit_directory_output(
630        &mut self,
631        entry: WalkEntry<'_>,
632        _tree_entry: Option<&TreeEntry>,
633        subtree: TreeBuildOutput,
634        state: &mut Self::DirectoryState,
635    ) -> Result<()> {
636        trace!(dir = %entry.path.display(), "Processing directory");
637        state.profile.blob_prep_ms += subtree.profile.blob_prep_ms;
638        state.profile.blob_write_ms += subtree.profile.blob_write_ms;
639        state.profile.tree_write_ms += subtree.profile.tree_write_ms;
640        state.profile.file_count += subtree.profile.file_count;
641        state.profile.dir_count += subtree.profile.dir_count + 1;
642        let store_start = Instant::now();
643        let hash = self.repo.store.put_tree(&subtree.tree)?;
644        state.profile.tree_write_ms += store_start.elapsed().as_millis();
645        state
646            .entries
647            .push(TreeEntry::directory(entry.name.to_string(), hash)?);
648        Ok(())
649    }
650
651    fn visit_missing(
652        &mut self,
653        _rel_path: &Path,
654        _tree_entry: &TreeEntry,
655        _state: &mut Self::DirectoryState,
656    ) -> Result<()> {
657        Ok(())
658    }
659
660    fn leave_directory(
661        &mut self,
662        directory: &WalkDirectory<'_>,
663        _tree: Option<&Tree>,
664        state: Self::DirectoryState,
665    ) -> Result<TreeBuildOutput> {
666        debug!(
667            dir = %self.repo.root().join(directory.rel_path).display(),
668            files = state.profile.file_count,
669            dirs = state.profile.dir_count,
670            "Directory processed"
671        );
672        Ok(TreeBuildOutput {
673            tree: Tree::from_entries(state.entries),
674            profile: state.profile,
675        })
676    }
677}
678
679#[cfg(test)]
680mod tests {
681    use objects::object::ContentHash;
682    use tempfile::TempDir;
683
684    use crate::worktree_walk::{read_blob_with_hash, read_file_hash};
685
686    #[test]
687    fn read_blob_with_hash_uses_bytes_read_when_file_grows() {
688        let temp_dir = TempDir::new().unwrap();
689        let path = temp_dir.path().join("file.txt");
690
691        std::fs::write(&path, b"abc").unwrap();
692        let initial_size = std::fs::metadata(&path).unwrap().len();
693        std::fs::write(&path, b"abcdef").unwrap();
694
695        let (blob, hash) = read_blob_with_hash(&path, initial_size).unwrap();
696
697        assert_eq!(blob.content(), b"abcdef");
698        assert_eq!(hash, blob.hash());
699    }
700
701    #[test]
702    fn read_file_hash_uses_bytes_read_when_file_grows() {
703        let temp_dir = TempDir::new().unwrap();
704        let path = temp_dir.path().join("file.txt");
705
706        std::fs::write(&path, b"abc").unwrap();
707        let initial_size = std::fs::metadata(&path).unwrap().len();
708        std::fs::write(&path, b"abcdef").unwrap();
709
710        let hash = read_file_hash(&path, initial_size).unwrap();
711
712        assert_eq!(hash, ContentHash::compute_typed("blob", b"abcdef"));
713    }
714}