1use std::{collections::HashSet, fs, path::Path, time::Instant};
5
6use objects::{
7 object::{Blob, ContentHash, Tree, TreeEntry},
8 store::ObjectStore,
9 util::gitlink_placeholder_bytes,
10 worktree::WorktreeStatus,
11};
12use tracing::{debug, instrument, trace, warn};
13
14use super::{
15 HeddleError, Repository, Result,
16 repository_worktree_status::{WorktreeStatusDetailed, compare_worktree_with_index_detailed},
17};
18use crate::{
19 FsMonitorSettings, WorktreeIndex, WorktreeStatusOptions,
20 fsmonitor::ChangeMonitorSession,
21 worktree_ignore::WorktreeIgnoreMatcher,
22 worktree_index::{WorktreeIndexLoadStats, WorktreeIndexSaveStats},
23 worktree_walk::{
24 WalkDirectory, WalkEntry, WorktreeWalkPolicy, read_blob_with_hash, validate_symlink_target,
25 walk_worktree,
26 },
27};
28
29#[derive(Debug, Clone, Default)]
30pub struct WorktreeCompareProfile {
31 pub index_load_ms: u128,
32 pub index_snapshot_load_ms: u128,
33 pub index_journal_replay_ms: u128,
34 pub index_snapshot_bytes: u64,
35 pub index_journal_bytes: u64,
36 pub index_journal_ops: usize,
37 pub monitor_prepare_ms: u128,
38 pub compare_ms: u128,
39 pub index_save_ms: u128,
40 pub index_snapshot_write_ms: u128,
41 pub index_journal_append_ms: u128,
42 pub index_save_snapshot_bytes: u64,
43 pub index_save_journal_bytes: u64,
44 pub index_save_journal_ops: usize,
45 pub index_save_compacted: bool,
46 pub monitor_persist_ms: u128,
47 pub untracked_flatten_ms: u128,
48 pub untracked_flattened_paths: usize,
49 pub tracked_refresh_ms: u128,
50 pub untracked_scan_ms: u128,
51 pub hashing_ms: u128,
52 pub directory_cache_compare_ms: u128,
53 pub directories_scanned: u64,
54 pub directories_skipped: u64,
55 pub files_hashed: u64,
56 pub cache_hits: u64,
57 pub monitor_changed_paths: u64,
58 pub monitor_skipped_directories: u64,
59}
60
61#[derive(Debug, Clone, Default)]
62pub struct TreeBuildProfile {
63 pub tree_walk_ms: u128,
64 pub blob_prep_ms: u128,
65 pub blob_write_ms: u128,
66 pub tree_write_ms: u128,
67 pub file_count: usize,
68 pub dir_count: usize,
69}
70
71#[derive(Debug, Clone)]
72struct TreeBuildOutput {
73 tree: Tree,
74 profile: TreeBuildProfile,
75}
76
77impl Repository {
78 #[instrument(skip(self), fields(dir = %dir.display()))]
80 pub fn build_tree(&self, dir: &Path) -> Result<Tree> {
81 self.build_tree_profiled(dir).map(|(tree, _)| tree)
82 }
83
84 pub fn build_tree_with_stat_cache(
99 &self,
100 dir: &Path,
101 manifest: &crate::thread_manifest::ThreadManifest,
102 ) -> Result<Tree> {
103 self.build_tree_profiled_inner(dir, None, Some(manifest))
104 .map(|(tree, _)| tree)
105 }
106
107 #[instrument(skip(self), fields(dir = %dir.display()))]
108 pub fn build_tree_profiled(&self, dir: &Path) -> Result<(Tree, TreeBuildProfile)> {
109 self.build_tree_profiled_inner(dir, None, None)
110 }
111
112 pub(crate) fn build_tree_profiled_against(
113 &self,
114 dir: &Path,
115 baseline_tree: Option<&Tree>,
116 ) -> Result<(Tree, TreeBuildProfile)> {
117 self.build_tree_profiled_inner(dir, baseline_tree, None)
118 }
119
120 #[instrument(skip(self, manifest), fields(dir = %dir.display()))]
128 pub fn build_tree_profiled_with_stat_cache(
129 &self,
130 dir: &Path,
131 manifest: &crate::thread_manifest::ThreadManifest,
132 ) -> Result<(Tree, TreeBuildProfile)> {
133 self.build_tree_profiled_inner(dir, None, Some(manifest))
134 }
135
136 pub(crate) fn build_tree_profiled_with_stat_cache_against(
137 &self,
138 dir: &Path,
139 baseline_tree: Option<&Tree>,
140 manifest: &crate::thread_manifest::ThreadManifest,
141 ) -> Result<(Tree, TreeBuildProfile)> {
142 self.build_tree_profiled_inner(dir, baseline_tree, Some(manifest))
143 }
144
145 fn build_tree_profiled_inner(
146 &self,
147 dir: &Path,
148 baseline_tree: Option<&Tree>,
149 stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
150 ) -> Result<(Tree, TreeBuildProfile)> {
151 let patterns = self.ignore_patterns()?;
152 debug!(pattern_count = patterns.len(), "Starting tree build");
153 let start = Instant::now();
154 let nested_exclusions = self.nested_thread_worktree_exclusions(dir)?;
155 let tree =
156 self.build_tree_walk(dir, &patterns, nested_exclusions, baseline_tree, stat_cache);
157 let elapsed = start.elapsed().as_millis();
158 debug!(duration_ms = elapsed, "Tree build complete");
159 tree.map(|output| {
160 let mut profile = output.profile;
161 profile.tree_walk_ms = elapsed;
162 (output.tree, profile)
163 })
164 }
165
166 #[instrument(skip(self, patterns, nested_exclusions, baseline_tree, stat_cache), fields(dir = %dir.display()))]
167 fn build_tree_walk(
168 &self,
169 dir: &Path,
170 patterns: &[String],
171 nested_exclusions: Vec<std::path::PathBuf>,
172 baseline_tree: Option<&Tree>,
173 stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
174 ) -> Result<TreeBuildOutput> {
175 let ignore_matcher =
176 WorktreeIgnoreMatcher::new(patterns).with_nested_worktree_exclusions(nested_exclusions);
177 let mut policy = TreeBuildPolicy::new(self, dir, stat_cache);
178 let mut output = walk_worktree(self, dir, &ignore_matcher, baseline_tree, &mut policy)?;
179
180 if !policy.pending_blobs.is_empty() {
186 let flush_start = Instant::now();
187 let pending = std::mem::take(&mut policy.pending_blobs);
188 self.store.put_blobs_packed(pending)?;
189 output.profile.blob_write_ms += flush_start.elapsed().as_millis();
190 }
191
192 Ok(output)
193 }
194
195 pub fn compare_worktree_cached(&self, tree: &Tree) -> Result<WorktreeStatus> {
197 self.compare_worktree_cached_with_options(tree, &self.default_worktree_status_options())
198 }
199
200 pub fn compare_worktree_cached_detailed(&self, tree: &Tree) -> Result<WorktreeStatusDetailed> {
201 self.compare_worktree_cached_detailed_with_options(
202 tree,
203 &self.default_worktree_status_options(),
204 )
205 }
206
207 pub fn compare_worktree_cached_with_options(
209 &self,
210 tree: &Tree,
211 options: &WorktreeStatusOptions,
212 ) -> Result<WorktreeStatus> {
213 self.compare_worktree_cached_profiled_with_options(tree, options)
214 .map(|(status, _)| status)
215 }
216
217 pub fn compare_worktree_cached_detailed_with_options(
218 &self,
219 tree: &Tree,
220 options: &WorktreeStatusOptions,
221 ) -> Result<WorktreeStatusDetailed> {
222 self.compare_worktree_cached_detailed_profiled_with_options(tree, options)
223 .map(|(status, _)| status)
224 }
225
226 pub fn compare_worktree_cached_profiled_with_options(
227 &self,
228 tree: &Tree,
229 options: &WorktreeStatusOptions,
230 ) -> Result<(WorktreeStatus, WorktreeCompareProfile)> {
231 let (detailed_status, mut profile) =
232 self.compare_worktree_cached_detailed_profiled_with_options(tree, options)?;
233 let flatten_start = Instant::now();
234 let flattened_paths = detailed_status.untracked.flattened_path_count();
235 let mut status = detailed_status.into_flat_status();
236 profile.untracked_flatten_ms = flatten_start.elapsed().as_millis();
237 profile.untracked_flattened_paths = flattened_paths;
238 status.modified.sort();
239 status.added.sort();
240 status.deleted.sort();
241 Ok((status, profile))
242 }
243
244 pub fn compare_worktree_cached_detailed_profiled_with_options(
245 &self,
246 tree: &Tree,
247 options: &WorktreeStatusOptions,
248 ) -> Result<(WorktreeStatusDetailed, WorktreeCompareProfile)> {
249 let index_path = self.worktree_index_path();
250 let load_start = Instant::now();
251 let (mut index, load_stats) = match WorktreeIndex::load_profiled(&index_path) {
252 Ok(result) => result,
253 Err(error) => {
254 warn!(path = %index_path.display(), %error, "Ignoring unreadable worktree index");
255 (WorktreeIndex::new(), WorktreeIndexLoadStats::default())
256 }
257 };
258 let index_load_ms = load_start.elapsed().as_millis();
259
260 let monitor_prepare_start = Instant::now();
261 let monitor = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
262 let monitor_prepare_ms = monitor_prepare_start.elapsed().as_millis();
263
264 let patterns = self.ignore_patterns()?;
265 let nested_exclusions = self.nested_thread_worktree_exclusions(self.root())?;
266 let ignore_matcher = WorktreeIgnoreMatcher::new(&patterns)
267 .with_nested_worktree_exclusions(nested_exclusions);
268 let compare_start = Instant::now();
269 let (status, stats) = compare_worktree_with_index_detailed(
270 self,
271 tree,
272 &ignore_matcher,
273 &mut index,
274 &monitor,
275 )?;
276 let compare_ms = compare_start.elapsed().as_millis();
277
278 let save_start = Instant::now();
279 let (index_save_ms, save_stats) = if index.is_dirty() {
280 match index.save_profiled(&index_path) {
281 Ok(stats) => {
282 index.mark_clean();
283 (save_start.elapsed().as_millis(), stats)
284 }
285 Err(error) => {
286 warn!(path = %index_path.display(), %error, "Failed to persist worktree index");
287 (0, WorktreeIndexSaveStats::default())
288 }
289 }
290 } else {
291 (0, WorktreeIndexSaveStats::default())
292 };
293
294 let persist_start = Instant::now();
295 if let Err(error) = monitor.persist() {
296 warn!(path = %self.root().display(), %error, "Failed to persist monitor state");
297 }
298 let monitor_persist_ms = persist_start.elapsed().as_millis();
299
300 debug!(
301 index_load_ms,
302 index_snapshot_load_ms = load_stats.snapshot_load_ms,
303 index_journal_replay_ms = load_stats.journal_replay_ms,
304 index_snapshot_bytes = load_stats.snapshot_bytes,
305 index_journal_bytes = load_stats.journal_bytes,
306 index_journal_ops = load_stats.journal_ops,
307 monitor_prepare_ms,
308 compare_ms,
309 index_save_ms,
310 index_snapshot_write_ms = save_stats.snapshot_write_ms,
311 index_journal_append_ms = save_stats.journal_append_ms,
312 index_save_snapshot_bytes = save_stats.snapshot_bytes,
313 index_save_journal_bytes = save_stats.journal_bytes,
314 index_save_journal_ops = save_stats.journal_ops,
315 index_save_compacted = save_stats.compacted,
316 index_save_compact_reason = save_stats.compact_reason.unwrap_or("none"),
317 monitor_persist_ms,
318 tracked_refresh_ms = stats.tracked_refresh_ms,
319 untracked_scan_ms = stats.untracked_scan_ms,
320 untracked_flatten_ms = 0,
321 untracked_flattened_paths = 0,
322 hashing_ms = stats.hashing_ms,
323 directory_cache_compare_ms = stats.directory_cache_compare_ms,
324 directories_scanned = stats.directories_scanned,
325 directories_skipped = stats.directories_skipped,
326 files_hashed = stats.files_hashed,
327 cache_hits = stats.cache_hits,
328 monitor_backend = monitor.backend.unwrap_or("off"),
329 monitor_status = ?monitor.status,
330 monitor_reason = monitor.reason.as_deref().unwrap_or("ready"),
331 monitor_changed_paths = stats.monitor_changed_paths,
332 monitor_skipped_directories = stats.monitor_skipped_directories,
333 "Worktree compare complete"
334 );
335
336 Ok((
337 status,
338 WorktreeCompareProfile {
339 index_load_ms,
340 index_snapshot_load_ms: load_stats.snapshot_load_ms,
341 index_journal_replay_ms: load_stats.journal_replay_ms,
342 index_snapshot_bytes: load_stats.snapshot_bytes,
343 index_journal_bytes: load_stats.journal_bytes,
344 index_journal_ops: load_stats.journal_ops,
345 monitor_prepare_ms,
346 compare_ms,
347 index_save_ms,
348 index_snapshot_write_ms: save_stats.snapshot_write_ms,
349 index_journal_append_ms: save_stats.journal_append_ms,
350 index_save_snapshot_bytes: save_stats.snapshot_bytes,
351 index_save_journal_bytes: save_stats.journal_bytes,
352 index_save_journal_ops: save_stats.journal_ops,
353 index_save_compacted: save_stats.compacted,
354 monitor_persist_ms,
355 untracked_flatten_ms: 0,
356 untracked_flattened_paths: 0,
357 tracked_refresh_ms: stats.tracked_refresh_ms,
358 untracked_scan_ms: stats.untracked_scan_ms,
359 hashing_ms: stats.hashing_ms,
360 directory_cache_compare_ms: stats.directory_cache_compare_ms,
361 directories_scanned: stats.directories_scanned,
362 directories_skipped: stats.directories_skipped,
363 files_hashed: stats.files_hashed,
364 cache_hits: stats.cache_hits,
365 monitor_changed_paths: stats.monitor_changed_paths,
366 monitor_skipped_directories: stats.monitor_skipped_directories,
367 },
368 ))
369 }
370
371 pub fn worktree_is_clean_cached(&self, tree: &Tree) -> Result<bool> {
373 self.worktree_is_clean_cached_with_options(tree, &self.default_worktree_status_options())
374 }
375
376 pub fn worktree_is_clean_cached_with_options(
378 &self,
379 tree: &Tree,
380 options: &WorktreeStatusOptions,
381 ) -> Result<bool> {
382 Ok(self
383 .compare_worktree_cached_detailed_with_options(tree, options)?
384 .is_clean())
385 }
386
387 fn worktree_index_path(&self) -> std::path::PathBuf {
388 self.root.join(".heddle/state").join("index.bin")
389 }
390
391 fn default_worktree_status_options(&self) -> WorktreeStatusOptions {
392 WorktreeStatusOptions {
393 fsmonitor: FsMonitorSettings::from(self.config.worktree.fsmonitor),
394 }
395 }
396
397 pub fn inspect_change_monitor_with_options(
398 &self,
399 options: &WorktreeStatusOptions,
400 ) -> Result<crate::ChangeMonitorReport> {
401 let session = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
402 let report = session.report();
403 session.persist()?;
404 Ok(report)
405 }
406}
407
408#[derive(Default)]
409struct TreeBuildState {
410 entries: Vec<TreeEntry>,
411 profile: TreeBuildProfile,
412}
413
414struct TreeBuildPolicy<'a> {
415 repo: &'a Repository,
416 walk_root: &'a Path,
419 stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
424 stat_cache_hits: u64,
425 pending_blobs: Vec<(ContentHash, Vec<u8>)>,
430 seen: HashSet<ContentHash>,
433}
434
435impl<'a> TreeBuildPolicy<'a> {
436 fn new(
437 repo: &'a Repository,
438 walk_root: &'a Path,
439 stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
440 ) -> Self {
441 Self {
442 repo,
443 walk_root,
444 stat_cache,
445 stat_cache_hits: 0,
446 pending_blobs: Vec::new(),
447 seen: HashSet::new(),
448 }
449 }
450
451 fn lookup_stat_cache_hash(&self, entry: &WalkEntry<'_>) -> Option<ContentHash> {
457 let cache = self.stat_cache?;
458 let rel = entry.path.strip_prefix(self.walk_root).ok()?;
459 let mut rel_str = String::with_capacity(rel.as_os_str().len());
462 for (i, component) in rel.components().enumerate() {
463 let std::path::Component::Normal(s) = component else {
464 return None;
465 };
466 if i > 0 {
467 rel_str.push('/');
468 }
469 rel_str.push_str(s.to_str()?);
470 }
471 let cached = cache.files.get(&rel_str)?;
472 let (size, inode, mtime_ns, ctime_ns, mode) =
473 crate::stat_signature::stat_signature(entry.path, &entry.metadata);
474 let stat = crate::thread_manifest::ManifestFile {
475 hash: cached.hash,
476 size,
477 inode,
478 mtime_ns,
479 ctime_ns,
480 mode,
481 };
482 if stat.matches(cached) {
483 Some(cached.hash)
484 } else {
485 None
486 }
487 }
488
489 fn enqueue_blob(&mut self, blob: Blob, hash: ContentHash) -> Result<()> {
494 if self.seen.contains(&hash) {
495 return Ok(());
496 }
497 if self.repo.store.has_blob(&hash)? {
498 self.seen.insert(hash);
499 return Ok(());
500 }
501 self.seen.insert(hash);
502 self.pending_blobs.push((hash, blob.into_content()));
503 Ok(())
504 }
505}
506
507impl WorktreeWalkPolicy for TreeBuildPolicy<'_> {
508 type DirectoryState = TreeBuildState;
509 type Output = TreeBuildOutput;
510
511 fn enter_directory(
512 &mut self,
513 _directory: &WalkDirectory<'_>,
514 _tree: Option<&Tree>,
515 ) -> Result<Self::DirectoryState> {
516 Ok(TreeBuildState::default())
517 }
518
519 fn visit_file(
520 &mut self,
521 entry: WalkEntry<'_>,
522 tree_entry: Option<&TreeEntry>,
523 state: &mut Self::DirectoryState,
524 ) -> Result<()> {
525 trace!(file = %entry.path.display(), size = entry.metadata.len(), "Processing file");
526
527 if let Some(target) = tree_entry.and_then(TreeEntry::gitlink_target) {
528 let read_start = Instant::now();
529 let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
530 let read_elapsed = read_start.elapsed().as_millis();
531 if blob.content() == gitlink_placeholder_bytes(&target) {
532 state.profile.file_count += 1;
533 state.profile.blob_prep_ms += read_elapsed;
534 state
535 .entries
536 .push(TreeEntry::gitlink(entry.name.to_string(), target)?);
537 return Ok(());
538 }
539
540 let enqueue_start = Instant::now();
541 self.enqueue_blob(blob, hash)?;
542 let enqueue_elapsed = enqueue_start.elapsed().as_millis();
543 state.profile.file_count += 1;
544 state.profile.blob_prep_ms += read_elapsed;
545 state.profile.blob_write_ms += enqueue_elapsed;
546 state.entries.push(TreeEntry::file(
547 entry.name.to_string(),
548 hash,
549 entry.executable,
550 )?);
551 return Ok(());
552 }
553
554 if let Some(hash) = self.lookup_stat_cache_hash(&entry) {
561 self.stat_cache_hits += 1;
562 state.profile.file_count += 1;
563 state.entries.push(TreeEntry::file(
564 entry.name.to_string(),
565 hash,
566 entry.executable,
567 )?);
568 return Ok(());
569 }
570
571 let read_start = Instant::now();
572 let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
573 let read_elapsed = read_start.elapsed().as_millis();
574 trace!(duration_ms = read_elapsed, "File read complete");
575
576 let enqueue_start = Instant::now();
581 self.enqueue_blob(blob, hash)?;
582 let enqueue_elapsed = enqueue_start.elapsed().as_millis();
583
584 state.profile.file_count += 1;
585 state.profile.blob_prep_ms += read_elapsed;
586 state.profile.blob_write_ms += enqueue_elapsed;
587 state.entries.push(TreeEntry::file(
588 entry.name.to_string(),
589 hash,
590 entry.executable,
591 )?);
592 Ok(())
593 }
594
595 fn visit_symlink(
596 &mut self,
597 entry: WalkEntry<'_>,
598 _tree_entry: Option<&TreeEntry>,
599 state: &mut Self::DirectoryState,
600 ) -> Result<()> {
601 let target = fs::read_link(entry.path)?;
602 let symlink_dir = entry.path.parent().unwrap_or(self.walk_root);
614 if !validate_symlink_target(self.walk_root, symlink_dir, &target) {
615 return Err(HeddleError::InvalidSymlinkTarget(target));
616 }
617
618 let blob = Blob::new(objects::util::symlink_target_bytes(&target));
619 let hash = blob.hash();
620 let enqueue_start = Instant::now();
621 self.enqueue_blob(blob, hash)?;
622 state.profile.blob_write_ms += enqueue_start.elapsed().as_millis();
623 state
624 .entries
625 .push(TreeEntry::symlink(entry.name.to_string(), hash)?);
626 Ok(())
627 }
628
629 fn visit_directory_output(
630 &mut self,
631 entry: WalkEntry<'_>,
632 _tree_entry: Option<&TreeEntry>,
633 subtree: TreeBuildOutput,
634 state: &mut Self::DirectoryState,
635 ) -> Result<()> {
636 trace!(dir = %entry.path.display(), "Processing directory");
637 state.profile.blob_prep_ms += subtree.profile.blob_prep_ms;
638 state.profile.blob_write_ms += subtree.profile.blob_write_ms;
639 state.profile.tree_write_ms += subtree.profile.tree_write_ms;
640 state.profile.file_count += subtree.profile.file_count;
641 state.profile.dir_count += subtree.profile.dir_count + 1;
642 let store_start = Instant::now();
643 let hash = self.repo.store.put_tree(&subtree.tree)?;
644 state.profile.tree_write_ms += store_start.elapsed().as_millis();
645 state
646 .entries
647 .push(TreeEntry::directory(entry.name.to_string(), hash)?);
648 Ok(())
649 }
650
651 fn visit_missing(
652 &mut self,
653 _rel_path: &Path,
654 _tree_entry: &TreeEntry,
655 _state: &mut Self::DirectoryState,
656 ) -> Result<()> {
657 Ok(())
658 }
659
660 fn leave_directory(
661 &mut self,
662 directory: &WalkDirectory<'_>,
663 _tree: Option<&Tree>,
664 state: Self::DirectoryState,
665 ) -> Result<TreeBuildOutput> {
666 debug!(
667 dir = %self.repo.root().join(directory.rel_path).display(),
668 files = state.profile.file_count,
669 dirs = state.profile.dir_count,
670 "Directory processed"
671 );
672 Ok(TreeBuildOutput {
673 tree: Tree::from_entries(state.entries),
674 profile: state.profile,
675 })
676 }
677}
678
679#[cfg(test)]
680mod tests {
681 use objects::object::ContentHash;
682 use tempfile::TempDir;
683
684 use crate::worktree_walk::{read_blob_with_hash, read_file_hash};
685
686 #[test]
687 fn read_blob_with_hash_uses_bytes_read_when_file_grows() {
688 let temp_dir = TempDir::new().unwrap();
689 let path = temp_dir.path().join("file.txt");
690
691 std::fs::write(&path, b"abc").unwrap();
692 let initial_size = std::fs::metadata(&path).unwrap().len();
693 std::fs::write(&path, b"abcdef").unwrap();
694
695 let (blob, hash) = read_blob_with_hash(&path, initial_size).unwrap();
696
697 assert_eq!(blob.content(), b"abcdef");
698 assert_eq!(hash, blob.hash());
699 }
700
701 #[test]
702 fn read_file_hash_uses_bytes_read_when_file_grows() {
703 let temp_dir = TempDir::new().unwrap();
704 let path = temp_dir.path().join("file.txt");
705
706 std::fs::write(&path, b"abc").unwrap();
707 let initial_size = std::fs::metadata(&path).unwrap().len();
708 std::fs::write(&path, b"abcdef").unwrap();
709
710 let hash = read_file_hash(&path, initial_size).unwrap();
711
712 assert_eq!(hash, ContentHash::compute_typed("blob", b"abcdef"));
713 }
714}