Skip to main content

repo/
repository_tree.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Tree building and materialization helpers.
3
4use std::{collections::HashSet, fs, path::Path, time::Instant};
5
6use objects::{
7    object::{Blob, ContentHash, Tree, TreeEntry},
8    worktree::WorktreeStatus,
9};
10use tracing::{debug, instrument, trace, warn};
11
12use super::{
13    HeddleError, Repository, Result,
14    repository_worktree_status::{WorktreeStatusDetailed, compare_worktree_with_index_detailed},
15};
16use crate::{
17    FsMonitorSettings, WorktreeIndex, WorktreeStatusOptions,
18    fsmonitor::ChangeMonitorSession,
19    worktree_ignore::WorktreeIgnoreMatcher,
20    worktree_index::{WorktreeIndexLoadStats, WorktreeIndexSaveStats},
21    worktree_walk::{
22        WalkDirectory, WalkEntry, WorktreeWalkPolicy, read_blob_with_hash, validate_symlink_target,
23        walk_worktree,
24    },
25};
26
27#[derive(Debug, Clone, Default)]
28pub struct WorktreeCompareProfile {
29    pub index_load_ms: u128,
30    pub index_snapshot_load_ms: u128,
31    pub index_journal_replay_ms: u128,
32    pub index_snapshot_bytes: u64,
33    pub index_journal_bytes: u64,
34    pub index_journal_ops: usize,
35    pub monitor_prepare_ms: u128,
36    pub compare_ms: u128,
37    pub index_save_ms: u128,
38    pub index_snapshot_write_ms: u128,
39    pub index_journal_append_ms: u128,
40    pub index_save_snapshot_bytes: u64,
41    pub index_save_journal_bytes: u64,
42    pub index_save_journal_ops: usize,
43    pub index_save_compacted: bool,
44    pub monitor_persist_ms: u128,
45    pub untracked_flatten_ms: u128,
46    pub untracked_flattened_paths: usize,
47}
48
49#[derive(Debug, Clone, Default)]
50pub struct TreeBuildProfile {
51    pub tree_walk_ms: u128,
52    pub blob_prep_ms: u128,
53    pub blob_write_ms: u128,
54    pub tree_write_ms: u128,
55    pub file_count: usize,
56    pub dir_count: usize,
57}
58
59#[derive(Debug, Clone)]
60struct TreeBuildOutput {
61    tree: Tree,
62    profile: TreeBuildProfile,
63}
64
65impl Repository {
66    /// Build a tree from a directory.
67    #[instrument(skip(self), fields(dir = %dir.display()))]
68    pub fn build_tree(&self, dir: &Path) -> Result<Tree> {
69        self.build_tree_profiled(dir).map(|(tree, _)| tree)
70    }
71
72    #[instrument(skip(self), fields(dir = %dir.display()))]
73    pub fn build_tree_profiled(&self, dir: &Path) -> Result<(Tree, TreeBuildProfile)> {
74        let patterns = self.ignore_patterns()?;
75        debug!(pattern_count = patterns.len(), "Starting tree build");
76        let start = Instant::now();
77        let nested_exclusions = self.nested_thread_worktree_exclusions(dir)?;
78        let tree = self.build_tree_walk(dir, &patterns, nested_exclusions);
79        let elapsed = start.elapsed().as_millis();
80        debug!(duration_ms = elapsed, "Tree build complete");
81        tree.map(|output| {
82            let mut profile = output.profile;
83            profile.tree_walk_ms = elapsed;
84            (output.tree, profile)
85        })
86    }
87
88    #[instrument(skip(self, patterns, nested_exclusions), fields(dir = %dir.display()))]
89    fn build_tree_walk(
90        &self,
91        dir: &Path,
92        patterns: &[String],
93        nested_exclusions: Vec<std::path::PathBuf>,
94    ) -> Result<TreeBuildOutput> {
95        let ignore_matcher =
96            WorktreeIgnoreMatcher::new(patterns).with_nested_worktree_exclusions(nested_exclusions);
97        let mut policy = TreeBuildPolicy::new(self);
98        let mut output = walk_worktree(self, dir, &ignore_matcher, None, &mut policy)?;
99
100        // Flush every newly-seen blob as a single packfile. Stores
101        // that don't override `put_blobs_packed` fall back to per-blob
102        // writes (correct, just slower). Time is folded into
103        // `blob_write_ms` so the existing perf profile keeps tracking
104        // total blob-storage cost.
105        if !policy.pending_blobs.is_empty() {
106            let flush_start = Instant::now();
107            let pending = std::mem::take(&mut policy.pending_blobs);
108            self.store.put_blobs_packed(pending)?;
109            output.profile.blob_write_ms += flush_start.elapsed().as_millis();
110        }
111
112        Ok(output)
113    }
114
115    /// Compare the worktree against a tree using the persisted binary index.
116    pub fn compare_worktree_cached(&self, tree: &Tree) -> Result<WorktreeStatus> {
117        self.compare_worktree_cached_with_options(tree, &self.default_worktree_status_options())
118    }
119
120    pub fn compare_worktree_cached_detailed(&self, tree: &Tree) -> Result<WorktreeStatusDetailed> {
121        self.compare_worktree_cached_detailed_with_options(
122            tree,
123            &self.default_worktree_status_options(),
124        )
125    }
126
127    /// Compare the worktree against a tree using the persisted binary index.
128    pub fn compare_worktree_cached_with_options(
129        &self,
130        tree: &Tree,
131        options: &WorktreeStatusOptions,
132    ) -> Result<WorktreeStatus> {
133        self.compare_worktree_cached_profiled_with_options(tree, options)
134            .map(|(status, _)| status)
135    }
136
137    pub fn compare_worktree_cached_detailed_with_options(
138        &self,
139        tree: &Tree,
140        options: &WorktreeStatusOptions,
141    ) -> Result<WorktreeStatusDetailed> {
142        self.compare_worktree_cached_detailed_profiled_with_options(tree, options)
143            .map(|(status, _)| status)
144    }
145
146    pub fn compare_worktree_cached_profiled_with_options(
147        &self,
148        tree: &Tree,
149        options: &WorktreeStatusOptions,
150    ) -> Result<(WorktreeStatus, WorktreeCompareProfile)> {
151        let (detailed_status, mut profile) =
152            self.compare_worktree_cached_detailed_profiled_with_options(tree, options)?;
153        let flatten_start = Instant::now();
154        let flattened_paths = detailed_status.untracked.flattened_path_count();
155        let mut status = detailed_status.into_flat_status();
156        profile.untracked_flatten_ms = flatten_start.elapsed().as_millis();
157        profile.untracked_flattened_paths = flattened_paths;
158        status.modified.sort();
159        status.added.sort();
160        status.deleted.sort();
161        Ok((status, profile))
162    }
163
164    pub fn compare_worktree_cached_detailed_profiled_with_options(
165        &self,
166        tree: &Tree,
167        options: &WorktreeStatusOptions,
168    ) -> Result<(WorktreeStatusDetailed, WorktreeCompareProfile)> {
169        let index_path = self.worktree_index_path();
170        let load_start = Instant::now();
171        let (mut index, load_stats) = match WorktreeIndex::load_profiled(&index_path) {
172            Ok(result) => result,
173            Err(error) => {
174                warn!(path = %index_path.display(), %error, "Ignoring unreadable worktree index");
175                (WorktreeIndex::new(), WorktreeIndexLoadStats::default())
176            }
177        };
178        let index_load_ms = load_start.elapsed().as_millis();
179
180        let monitor_prepare_start = Instant::now();
181        let monitor = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
182        let monitor_prepare_ms = monitor_prepare_start.elapsed().as_millis();
183
184        let patterns = self.ignore_patterns()?;
185        let nested_exclusions = self.nested_thread_worktree_exclusions(self.root())?;
186        let ignore_matcher = WorktreeIgnoreMatcher::new(&patterns)
187            .with_nested_worktree_exclusions(nested_exclusions);
188        let compare_start = Instant::now();
189        let (status, stats) = compare_worktree_with_index_detailed(
190            self,
191            tree,
192            &ignore_matcher,
193            &mut index,
194            &monitor,
195        )?;
196        let compare_ms = compare_start.elapsed().as_millis();
197
198        let save_start = Instant::now();
199        let (index_save_ms, save_stats) = if index.is_dirty() {
200            match index.save_profiled(&index_path) {
201                Ok(stats) => {
202                    index.mark_clean();
203                    (save_start.elapsed().as_millis(), stats)
204                }
205                Err(error) => {
206                    warn!(path = %index_path.display(), %error, "Failed to persist worktree index");
207                    (0, WorktreeIndexSaveStats::default())
208                }
209            }
210        } else {
211            (0, WorktreeIndexSaveStats::default())
212        };
213
214        let persist_start = Instant::now();
215        if let Err(error) = monitor.persist() {
216            warn!(path = %self.root().display(), %error, "Failed to persist monitor state");
217        }
218        let monitor_persist_ms = persist_start.elapsed().as_millis();
219
220        debug!(
221            index_load_ms,
222            index_snapshot_load_ms = load_stats.snapshot_load_ms,
223            index_journal_replay_ms = load_stats.journal_replay_ms,
224            index_snapshot_bytes = load_stats.snapshot_bytes,
225            index_journal_bytes = load_stats.journal_bytes,
226            index_journal_ops = load_stats.journal_ops,
227            monitor_prepare_ms,
228            compare_ms,
229            index_save_ms,
230            index_snapshot_write_ms = save_stats.snapshot_write_ms,
231            index_journal_append_ms = save_stats.journal_append_ms,
232            index_save_snapshot_bytes = save_stats.snapshot_bytes,
233            index_save_journal_bytes = save_stats.journal_bytes,
234            index_save_journal_ops = save_stats.journal_ops,
235            index_save_compacted = save_stats.compacted,
236            index_save_compact_reason = save_stats.compact_reason.unwrap_or("none"),
237            monitor_persist_ms,
238            tracked_refresh_ms = stats.tracked_refresh_ms,
239            untracked_scan_ms = stats.untracked_scan_ms,
240            untracked_flatten_ms = 0,
241            untracked_flattened_paths = 0,
242            hashing_ms = stats.hashing_ms,
243            directory_cache_compare_ms = stats.directory_cache_compare_ms,
244            directories_scanned = stats.directories_scanned,
245            directories_skipped = stats.directories_skipped,
246            files_hashed = stats.files_hashed,
247            cache_hits = stats.cache_hits,
248            monitor_backend = monitor.backend.unwrap_or("off"),
249            monitor_status = ?monitor.status,
250            monitor_reason = monitor.reason.as_deref().unwrap_or("ready"),
251            monitor_changed_paths = stats.monitor_changed_paths,
252            monitor_skipped_directories = stats.monitor_skipped_directories,
253            "Worktree compare complete"
254        );
255
256        Ok((
257            status,
258            WorktreeCompareProfile {
259                index_load_ms,
260                index_snapshot_load_ms: load_stats.snapshot_load_ms,
261                index_journal_replay_ms: load_stats.journal_replay_ms,
262                index_snapshot_bytes: load_stats.snapshot_bytes,
263                index_journal_bytes: load_stats.journal_bytes,
264                index_journal_ops: load_stats.journal_ops,
265                monitor_prepare_ms,
266                compare_ms,
267                index_save_ms,
268                index_snapshot_write_ms: save_stats.snapshot_write_ms,
269                index_journal_append_ms: save_stats.journal_append_ms,
270                index_save_snapshot_bytes: save_stats.snapshot_bytes,
271                index_save_journal_bytes: save_stats.journal_bytes,
272                index_save_journal_ops: save_stats.journal_ops,
273                index_save_compacted: save_stats.compacted,
274                monitor_persist_ms,
275                untracked_flatten_ms: 0,
276                untracked_flattened_paths: 0,
277            },
278        ))
279    }
280
281    /// Return whether the worktree matches the provided tree.
282    pub fn worktree_is_clean_cached(&self, tree: &Tree) -> Result<bool> {
283        self.worktree_is_clean_cached_with_options(tree, &self.default_worktree_status_options())
284    }
285
286    /// Return whether the worktree matches the provided tree.
287    pub fn worktree_is_clean_cached_with_options(
288        &self,
289        tree: &Tree,
290        options: &WorktreeStatusOptions,
291    ) -> Result<bool> {
292        Ok(self
293            .compare_worktree_cached_detailed_with_options(tree, options)?
294            .is_clean())
295    }
296
297    fn worktree_index_path(&self) -> std::path::PathBuf {
298        self.root.join(".heddle/state").join("index.bin")
299    }
300
301    fn default_worktree_status_options(&self) -> WorktreeStatusOptions {
302        WorktreeStatusOptions {
303            fsmonitor: FsMonitorSettings::from(self.config.worktree.fsmonitor),
304        }
305    }
306
307    pub fn inspect_change_monitor_with_options(
308        &self,
309        options: &WorktreeStatusOptions,
310    ) -> Result<crate::ChangeMonitorReport> {
311        let session = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
312        let report = session.report();
313        session.persist()?;
314        Ok(report)
315    }
316}
317
318#[derive(Default)]
319struct TreeBuildState {
320    entries: Vec<TreeEntry>,
321    profile: TreeBuildProfile,
322}
323
324struct TreeBuildPolicy<'a> {
325    repo: &'a Repository,
326    /// Blobs encountered during the walk that aren't already in the
327    /// store. Drained once at the end of the walk into a single
328    /// packfile via `ObjectStore::put_blobs_packed` — turns N×fsync
329    /// per blob into 2×fsync total (the .pack + .idx).
330    pending_blobs: Vec<(ContentHash, Vec<u8>)>,
331    /// Hashes already queued in `pending_blobs` so we don't double-add
332    /// content-equal files (which is common: README.md, .gitkeep, etc).
333    seen: HashSet<ContentHash>,
334}
335
336impl<'a> TreeBuildPolicy<'a> {
337    fn new(repo: &'a Repository) -> Self {
338        Self {
339            repo,
340            pending_blobs: Vec::new(),
341            seen: HashSet::new(),
342        }
343    }
344
345    /// Push a blob into the pending pack if it's not already in the
346    /// store and not already queued. The hash is always the canonical
347    /// blob hash — caller passes a precomputed one to avoid hashing
348    /// twice.
349    fn enqueue_blob(&mut self, blob: Blob, hash: ContentHash) -> Result<()> {
350        if self.seen.contains(&hash) {
351            return Ok(());
352        }
353        if self.repo.store.has_blob(&hash)? {
354            self.seen.insert(hash);
355            return Ok(());
356        }
357        self.seen.insert(hash);
358        self.pending_blobs.push((hash, blob.into_content()));
359        Ok(())
360    }
361}
362
363impl WorktreeWalkPolicy for TreeBuildPolicy<'_> {
364    type DirectoryState = TreeBuildState;
365    type Output = TreeBuildOutput;
366
367    fn enter_directory(
368        &mut self,
369        _directory: &WalkDirectory<'_>,
370        _tree: Option<&Tree>,
371    ) -> Result<Self::DirectoryState> {
372        Ok(TreeBuildState::default())
373    }
374
375    fn visit_file(
376        &mut self,
377        entry: WalkEntry<'_>,
378        _tree_entry: Option<&TreeEntry>,
379        state: &mut Self::DirectoryState,
380    ) -> Result<()> {
381        trace!(file = %entry.path.display(), size = entry.metadata.len(), "Processing file");
382        let read_start = Instant::now();
383        let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
384        let read_elapsed = read_start.elapsed().as_millis();
385        trace!(duration_ms = read_elapsed, "File read complete");
386
387        // Defer the actual write — we accumulate every new blob and
388        // install them as a single pack at the end of the walk
389        // (one fsync regardless of file count, vs. ~30ms per loose
390        // file on macOS). The tree entry only needs the hash.
391        let enqueue_start = Instant::now();
392        self.enqueue_blob(blob, hash)?;
393        let enqueue_elapsed = enqueue_start.elapsed().as_millis();
394
395        state.profile.file_count += 1;
396        state.profile.blob_prep_ms += read_elapsed;
397        state.profile.blob_write_ms += enqueue_elapsed;
398        state.entries.push(TreeEntry::file(
399            entry.name.to_string(),
400            hash,
401            entry.executable,
402        )?);
403        Ok(())
404    }
405
406    fn visit_symlink(
407        &mut self,
408        entry: WalkEntry<'_>,
409        _tree_entry: Option<&TreeEntry>,
410        state: &mut Self::DirectoryState,
411    ) -> Result<()> {
412        let target = fs::read_link(entry.path)?;
413        let symlink_dir = entry.path.parent().unwrap_or(self.repo.root());
414        if !validate_symlink_target(self.repo.root(), symlink_dir, &target) {
415            return Err(HeddleError::InvalidSymlinkTarget(target));
416        }
417
418        let blob = Blob::new(target.to_string_lossy().as_bytes().to_vec());
419        let hash = blob.hash();
420        let enqueue_start = Instant::now();
421        self.enqueue_blob(blob, hash)?;
422        state.profile.blob_write_ms += enqueue_start.elapsed().as_millis();
423        state
424            .entries
425            .push(TreeEntry::symlink(entry.name.to_string(), hash)?);
426        Ok(())
427    }
428
429    fn visit_directory_output(
430        &mut self,
431        entry: WalkEntry<'_>,
432        _tree_entry: Option<&TreeEntry>,
433        subtree: TreeBuildOutput,
434        state: &mut Self::DirectoryState,
435    ) -> Result<()> {
436        trace!(dir = %entry.path.display(), "Processing directory");
437        state.profile.blob_prep_ms += subtree.profile.blob_prep_ms;
438        state.profile.blob_write_ms += subtree.profile.blob_write_ms;
439        state.profile.tree_write_ms += subtree.profile.tree_write_ms;
440        state.profile.file_count += subtree.profile.file_count;
441        state.profile.dir_count += subtree.profile.dir_count + 1;
442        let store_start = Instant::now();
443        let hash = self.repo.store.put_tree(&subtree.tree)?;
444        state.profile.tree_write_ms += store_start.elapsed().as_millis();
445        state
446            .entries
447            .push(TreeEntry::directory(entry.name.to_string(), hash)?);
448        Ok(())
449    }
450
451    fn visit_missing(
452        &mut self,
453        _rel_path: &Path,
454        _tree_entry: &TreeEntry,
455        _state: &mut Self::DirectoryState,
456    ) -> Result<()> {
457        Ok(())
458    }
459
460    fn leave_directory(
461        &mut self,
462        directory: &WalkDirectory<'_>,
463        _tree: Option<&Tree>,
464        state: Self::DirectoryState,
465    ) -> Result<TreeBuildOutput> {
466        debug!(
467            dir = %self.repo.root().join(directory.rel_path).display(),
468            files = state.profile.file_count,
469            dirs = state.profile.dir_count,
470            "Directory processed"
471        );
472        Ok(TreeBuildOutput {
473            tree: Tree::from_entries(state.entries),
474            profile: state.profile,
475        })
476    }
477}
478
479#[cfg(test)]
480mod tests {
481    use objects::object::ContentHash;
482    use tempfile::TempDir;
483
484    use crate::worktree_walk::{read_blob_with_hash, read_file_hash};
485
486    #[test]
487    fn read_blob_with_hash_uses_bytes_read_when_file_grows() {
488        let temp_dir = TempDir::new().unwrap();
489        let path = temp_dir.path().join("file.txt");
490
491        std::fs::write(&path, b"abc").unwrap();
492        let initial_size = std::fs::metadata(&path).unwrap().len();
493        std::fs::write(&path, b"abcdef").unwrap();
494
495        let (blob, hash) = read_blob_with_hash(&path, initial_size).unwrap();
496
497        assert_eq!(blob.content(), b"abcdef");
498        assert_eq!(hash, blob.hash());
499    }
500
501    #[test]
502    fn read_file_hash_uses_bytes_read_when_file_grows() {
503        let temp_dir = TempDir::new().unwrap();
504        let path = temp_dir.path().join("file.txt");
505
506        std::fs::write(&path, b"abc").unwrap();
507        let initial_size = std::fs::metadata(&path).unwrap().len();
508        std::fs::write(&path, b"abcdef").unwrap();
509
510        let hash = read_file_hash(&path, initial_size).unwrap();
511
512        assert_eq!(hash, ContentHash::compute_typed("blob", b"abcdef"));
513    }
514}