1use std::{collections::HashSet, fs, path::Path, time::Instant};
5
6use objects::{
7 object::{Blob, ContentHash, Tree, TreeEntry},
8 store::ObjectStore,
9 worktree::WorktreeStatus,
10};
11use tracing::{debug, instrument, trace, warn};
12
13use super::{
14 HeddleError, Repository, Result,
15 repository_worktree_status::{WorktreeStatusDetailed, compare_worktree_with_index_detailed},
16};
17use crate::{
18 FsMonitorSettings, WorktreeIndex, WorktreeStatusOptions,
19 fsmonitor::ChangeMonitorSession,
20 worktree_ignore::WorktreeIgnoreMatcher,
21 worktree_index::{WorktreeIndexLoadStats, WorktreeIndexSaveStats},
22 worktree_walk::{
23 WalkDirectory, WalkEntry, WorktreeWalkPolicy, read_blob_with_hash, validate_symlink_target,
24 walk_worktree,
25 },
26};
27
28#[derive(Debug, Clone, Default)]
29pub struct WorktreeCompareProfile {
30 pub index_load_ms: u128,
31 pub index_snapshot_load_ms: u128,
32 pub index_journal_replay_ms: u128,
33 pub index_snapshot_bytes: u64,
34 pub index_journal_bytes: u64,
35 pub index_journal_ops: usize,
36 pub monitor_prepare_ms: u128,
37 pub compare_ms: u128,
38 pub index_save_ms: u128,
39 pub index_snapshot_write_ms: u128,
40 pub index_journal_append_ms: u128,
41 pub index_save_snapshot_bytes: u64,
42 pub index_save_journal_bytes: u64,
43 pub index_save_journal_ops: usize,
44 pub index_save_compacted: bool,
45 pub monitor_persist_ms: u128,
46 pub untracked_flatten_ms: u128,
47 pub untracked_flattened_paths: usize,
48 pub tracked_refresh_ms: u128,
49 pub untracked_scan_ms: u128,
50 pub hashing_ms: u128,
51 pub directory_cache_compare_ms: u128,
52 pub directories_scanned: u64,
53 pub directories_skipped: u64,
54 pub files_hashed: u64,
55 pub cache_hits: u64,
56 pub monitor_changed_paths: u64,
57 pub monitor_skipped_directories: u64,
58}
59
60#[derive(Debug, Clone, Default)]
61pub struct TreeBuildProfile {
62 pub tree_walk_ms: u128,
63 pub blob_prep_ms: u128,
64 pub blob_write_ms: u128,
65 pub tree_write_ms: u128,
66 pub file_count: usize,
67 pub dir_count: usize,
68}
69
70#[derive(Debug, Clone)]
71struct TreeBuildOutput {
72 tree: Tree,
73 profile: TreeBuildProfile,
74}
75
76impl Repository {
77 #[instrument(skip(self), fields(dir = %dir.display()))]
79 pub fn build_tree(&self, dir: &Path) -> Result<Tree> {
80 self.build_tree_profiled(dir).map(|(tree, _)| tree)
81 }
82
83 pub fn build_tree_with_stat_cache(
98 &self,
99 dir: &Path,
100 manifest: &crate::thread_manifest::ThreadManifest,
101 ) -> Result<Tree> {
102 self.build_tree_profiled_inner(dir, Some(manifest))
103 .map(|(tree, _)| tree)
104 }
105
106 #[instrument(skip(self), fields(dir = %dir.display()))]
107 pub fn build_tree_profiled(&self, dir: &Path) -> Result<(Tree, TreeBuildProfile)> {
108 self.build_tree_profiled_inner(dir, None)
109 }
110
111 #[instrument(skip(self, manifest), fields(dir = %dir.display()))]
119 pub fn build_tree_profiled_with_stat_cache(
120 &self,
121 dir: &Path,
122 manifest: &crate::thread_manifest::ThreadManifest,
123 ) -> Result<(Tree, TreeBuildProfile)> {
124 self.build_tree_profiled_inner(dir, Some(manifest))
125 }
126
127 fn build_tree_profiled_inner(
128 &self,
129 dir: &Path,
130 stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
131 ) -> Result<(Tree, TreeBuildProfile)> {
132 let patterns = self.ignore_patterns()?;
133 debug!(pattern_count = patterns.len(), "Starting tree build");
134 let start = Instant::now();
135 let nested_exclusions = self.nested_thread_worktree_exclusions(dir)?;
136 let tree = self.build_tree_walk(dir, &patterns, nested_exclusions, stat_cache);
137 let elapsed = start.elapsed().as_millis();
138 debug!(duration_ms = elapsed, "Tree build complete");
139 tree.map(|output| {
140 let mut profile = output.profile;
141 profile.tree_walk_ms = elapsed;
142 (output.tree, profile)
143 })
144 }
145
146 #[instrument(skip(self, patterns, nested_exclusions, stat_cache), fields(dir = %dir.display()))]
147 fn build_tree_walk(
148 &self,
149 dir: &Path,
150 patterns: &[String],
151 nested_exclusions: Vec<std::path::PathBuf>,
152 stat_cache: Option<&crate::thread_manifest::ThreadManifest>,
153 ) -> Result<TreeBuildOutput> {
154 let ignore_matcher =
155 WorktreeIgnoreMatcher::new(patterns).with_nested_worktree_exclusions(nested_exclusions);
156 let mut policy = TreeBuildPolicy::new(self, dir, stat_cache);
157 let mut output = walk_worktree(self, dir, &ignore_matcher, None, &mut policy)?;
158
159 if !policy.pending_blobs.is_empty() {
165 let flush_start = Instant::now();
166 let pending = std::mem::take(&mut policy.pending_blobs);
167 self.store.put_blobs_packed(pending)?;
168 output.profile.blob_write_ms += flush_start.elapsed().as_millis();
169 }
170
171 Ok(output)
172 }
173
174 pub fn compare_worktree_cached(&self, tree: &Tree) -> Result<WorktreeStatus> {
176 self.compare_worktree_cached_with_options(tree, &self.default_worktree_status_options())
177 }
178
179 pub fn compare_worktree_cached_detailed(&self, tree: &Tree) -> Result<WorktreeStatusDetailed> {
180 self.compare_worktree_cached_detailed_with_options(
181 tree,
182 &self.default_worktree_status_options(),
183 )
184 }
185
186 pub fn compare_worktree_cached_with_options(
188 &self,
189 tree: &Tree,
190 options: &WorktreeStatusOptions,
191 ) -> Result<WorktreeStatus> {
192 self.compare_worktree_cached_profiled_with_options(tree, options)
193 .map(|(status, _)| status)
194 }
195
196 pub fn compare_worktree_cached_detailed_with_options(
197 &self,
198 tree: &Tree,
199 options: &WorktreeStatusOptions,
200 ) -> Result<WorktreeStatusDetailed> {
201 self.compare_worktree_cached_detailed_profiled_with_options(tree, options)
202 .map(|(status, _)| status)
203 }
204
205 pub fn compare_worktree_cached_profiled_with_options(
206 &self,
207 tree: &Tree,
208 options: &WorktreeStatusOptions,
209 ) -> Result<(WorktreeStatus, WorktreeCompareProfile)> {
210 let (detailed_status, mut profile) =
211 self.compare_worktree_cached_detailed_profiled_with_options(tree, options)?;
212 let flatten_start = Instant::now();
213 let flattened_paths = detailed_status.untracked.flattened_path_count();
214 let mut status = detailed_status.into_flat_status();
215 profile.untracked_flatten_ms = flatten_start.elapsed().as_millis();
216 profile.untracked_flattened_paths = flattened_paths;
217 status.modified.sort();
218 status.added.sort();
219 status.deleted.sort();
220 Ok((status, profile))
221 }
222
223 pub fn compare_worktree_cached_detailed_profiled_with_options(
224 &self,
225 tree: &Tree,
226 options: &WorktreeStatusOptions,
227 ) -> Result<(WorktreeStatusDetailed, WorktreeCompareProfile)> {
228 let index_path = self.worktree_index_path();
229 let load_start = Instant::now();
230 let (mut index, load_stats) = match WorktreeIndex::load_profiled(&index_path) {
231 Ok(result) => result,
232 Err(error) => {
233 warn!(path = %index_path.display(), %error, "Ignoring unreadable worktree index");
234 (WorktreeIndex::new(), WorktreeIndexLoadStats::default())
235 }
236 };
237 let index_load_ms = load_start.elapsed().as_millis();
238
239 let monitor_prepare_start = Instant::now();
240 let monitor = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
241 let monitor_prepare_ms = monitor_prepare_start.elapsed().as_millis();
242
243 let patterns = self.ignore_patterns()?;
244 let nested_exclusions = self.nested_thread_worktree_exclusions(self.root())?;
245 let ignore_matcher = WorktreeIgnoreMatcher::new(&patterns)
246 .with_nested_worktree_exclusions(nested_exclusions);
247 let compare_start = Instant::now();
248 let (status, stats) = compare_worktree_with_index_detailed(
249 self,
250 tree,
251 &ignore_matcher,
252 &mut index,
253 &monitor,
254 )?;
255 let compare_ms = compare_start.elapsed().as_millis();
256
257 let save_start = Instant::now();
258 let (index_save_ms, save_stats) = if index.is_dirty() {
259 match index.save_profiled(&index_path) {
260 Ok(stats) => {
261 index.mark_clean();
262 (save_start.elapsed().as_millis(), stats)
263 }
264 Err(error) => {
265 warn!(path = %index_path.display(), %error, "Failed to persist worktree index");
266 (0, WorktreeIndexSaveStats::default())
267 }
268 }
269 } else {
270 (0, WorktreeIndexSaveStats::default())
271 };
272
273 let persist_start = Instant::now();
274 if let Err(error) = monitor.persist() {
275 warn!(path = %self.root().display(), %error, "Failed to persist monitor state");
276 }
277 let monitor_persist_ms = persist_start.elapsed().as_millis();
278
279 debug!(
280 index_load_ms,
281 index_snapshot_load_ms = load_stats.snapshot_load_ms,
282 index_journal_replay_ms = load_stats.journal_replay_ms,
283 index_snapshot_bytes = load_stats.snapshot_bytes,
284 index_journal_bytes = load_stats.journal_bytes,
285 index_journal_ops = load_stats.journal_ops,
286 monitor_prepare_ms,
287 compare_ms,
288 index_save_ms,
289 index_snapshot_write_ms = save_stats.snapshot_write_ms,
290 index_journal_append_ms = save_stats.journal_append_ms,
291 index_save_snapshot_bytes = save_stats.snapshot_bytes,
292 index_save_journal_bytes = save_stats.journal_bytes,
293 index_save_journal_ops = save_stats.journal_ops,
294 index_save_compacted = save_stats.compacted,
295 index_save_compact_reason = save_stats.compact_reason.unwrap_or("none"),
296 monitor_persist_ms,
297 tracked_refresh_ms = stats.tracked_refresh_ms,
298 untracked_scan_ms = stats.untracked_scan_ms,
299 untracked_flatten_ms = 0,
300 untracked_flattened_paths = 0,
301 hashing_ms = stats.hashing_ms,
302 directory_cache_compare_ms = stats.directory_cache_compare_ms,
303 directories_scanned = stats.directories_scanned,
304 directories_skipped = stats.directories_skipped,
305 files_hashed = stats.files_hashed,
306 cache_hits = stats.cache_hits,
307 monitor_backend = monitor.backend.unwrap_or("off"),
308 monitor_status = ?monitor.status,
309 monitor_reason = monitor.reason.as_deref().unwrap_or("ready"),
310 monitor_changed_paths = stats.monitor_changed_paths,
311 monitor_skipped_directories = stats.monitor_skipped_directories,
312 "Worktree compare complete"
313 );
314
315 Ok((
316 status,
317 WorktreeCompareProfile {
318 index_load_ms,
319 index_snapshot_load_ms: load_stats.snapshot_load_ms,
320 index_journal_replay_ms: load_stats.journal_replay_ms,
321 index_snapshot_bytes: load_stats.snapshot_bytes,
322 index_journal_bytes: load_stats.journal_bytes,
323 index_journal_ops: load_stats.journal_ops,
324 monitor_prepare_ms,
325 compare_ms,
326 index_save_ms,
327 index_snapshot_write_ms: save_stats.snapshot_write_ms,
328 index_journal_append_ms: save_stats.journal_append_ms,
329 index_save_snapshot_bytes: save_stats.snapshot_bytes,
330 index_save_journal_bytes: save_stats.journal_bytes,
331 index_save_journal_ops: save_stats.journal_ops,
332 index_save_compacted: save_stats.compacted,
333 monitor_persist_ms,
334 untracked_flatten_ms: 0,
335 untracked_flattened_paths: 0,
336 tracked_refresh_ms: stats.tracked_refresh_ms,
337 untracked_scan_ms: stats.untracked_scan_ms,
338 hashing_ms: stats.hashing_ms,
339 directory_cache_compare_ms: stats.directory_cache_compare_ms,
340 directories_scanned: stats.directories_scanned,
341 directories_skipped: stats.directories_skipped,
342 files_hashed: stats.files_hashed,
343 cache_hits: stats.cache_hits,
344 monitor_changed_paths: stats.monitor_changed_paths,
345 monitor_skipped_directories: stats.monitor_skipped_directories,
346 },
347 ))
348 }
349
350 pub fn worktree_is_clean_cached(&self, tree: &Tree) -> Result<bool> {
352 self.worktree_is_clean_cached_with_options(tree, &self.default_worktree_status_options())
353 }
354
355 pub fn worktree_is_clean_cached_with_options(
357 &self,
358 tree: &Tree,
359 options: &WorktreeStatusOptions,
360 ) -> Result<bool> {
361 Ok(self
362 .compare_worktree_cached_detailed_with_options(tree, options)?
363 .is_clean())
364 }
365
366 fn worktree_index_path(&self) -> std::path::PathBuf {
367 self.root.join(".heddle/state").join("index.bin")
368 }
369
370 fn default_worktree_status_options(&self) -> WorktreeStatusOptions {
371 WorktreeStatusOptions {
372 fsmonitor: FsMonitorSettings::from(self.config.worktree.fsmonitor),
373 }
374 }
375
376 pub fn inspect_change_monitor_with_options(
377 &self,
378 options: &WorktreeStatusOptions,
379 ) -> Result<crate::ChangeMonitorReport> {
380 let session = ChangeMonitorSession::prepare(self.root(), options.fsmonitor);
381 let report = session.report();
382 session.persist()?;
383 Ok(report)
384 }
385}
386
387#[derive(Default)]
388struct TreeBuildState {
389 entries: Vec<TreeEntry>,
390 profile: TreeBuildProfile,
391}
392
393struct TreeBuildPolicy<'a> {
394 repo: &'a Repository,
395 walk_root: &'a Path,
398 stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
403 stat_cache_hits: u64,
404 pending_blobs: Vec<(ContentHash, Vec<u8>)>,
409 seen: HashSet<ContentHash>,
412}
413
414impl<'a> TreeBuildPolicy<'a> {
415 fn new(
416 repo: &'a Repository,
417 walk_root: &'a Path,
418 stat_cache: Option<&'a crate::thread_manifest::ThreadManifest>,
419 ) -> Self {
420 Self {
421 repo,
422 walk_root,
423 stat_cache,
424 stat_cache_hits: 0,
425 pending_blobs: Vec::new(),
426 seen: HashSet::new(),
427 }
428 }
429
430 fn lookup_stat_cache_hash(&self, entry: &WalkEntry<'_>) -> Option<ContentHash> {
436 let cache = self.stat_cache?;
437 let rel = entry.path.strip_prefix(self.walk_root).ok()?;
438 let mut rel_str = String::with_capacity(rel.as_os_str().len());
441 for (i, component) in rel.components().enumerate() {
442 let std::path::Component::Normal(s) = component else {
443 return None;
444 };
445 if i > 0 {
446 rel_str.push('/');
447 }
448 rel_str.push_str(s.to_str()?);
449 }
450 let cached = cache.files.get(&rel_str)?;
451 let (size, inode, mtime_ns, ctime_ns, mode) =
452 crate::stat_signature::stat_signature(entry.path, &entry.metadata);
453 let stat = crate::thread_manifest::ManifestFile {
454 hash: cached.hash,
455 size,
456 inode,
457 mtime_ns,
458 ctime_ns,
459 mode,
460 };
461 if stat.matches(cached) {
462 Some(cached.hash)
463 } else {
464 None
465 }
466 }
467
468 fn enqueue_blob(&mut self, blob: Blob, hash: ContentHash) -> Result<()> {
473 if self.seen.contains(&hash) {
474 return Ok(());
475 }
476 if self.repo.store.has_blob(&hash)? {
477 self.seen.insert(hash);
478 return Ok(());
479 }
480 self.seen.insert(hash);
481 self.pending_blobs.push((hash, blob.into_content()));
482 Ok(())
483 }
484}
485
486impl WorktreeWalkPolicy for TreeBuildPolicy<'_> {
487 type DirectoryState = TreeBuildState;
488 type Output = TreeBuildOutput;
489
490 fn enter_directory(
491 &mut self,
492 _directory: &WalkDirectory<'_>,
493 _tree: Option<&Tree>,
494 ) -> Result<Self::DirectoryState> {
495 Ok(TreeBuildState::default())
496 }
497
498 fn visit_file(
499 &mut self,
500 entry: WalkEntry<'_>,
501 _tree_entry: Option<&TreeEntry>,
502 state: &mut Self::DirectoryState,
503 ) -> Result<()> {
504 trace!(file = %entry.path.display(), size = entry.metadata.len(), "Processing file");
505
506 if let Some(hash) = self.lookup_stat_cache_hash(&entry) {
513 self.stat_cache_hits += 1;
514 state.profile.file_count += 1;
515 state.entries.push(TreeEntry::file(
516 entry.name.to_string(),
517 hash,
518 entry.executable,
519 )?);
520 return Ok(());
521 }
522
523 let read_start = Instant::now();
524 let (blob, hash) = read_blob_with_hash(entry.path, entry.metadata.len())?;
525 let read_elapsed = read_start.elapsed().as_millis();
526 trace!(duration_ms = read_elapsed, "File read complete");
527
528 let enqueue_start = Instant::now();
533 self.enqueue_blob(blob, hash)?;
534 let enqueue_elapsed = enqueue_start.elapsed().as_millis();
535
536 state.profile.file_count += 1;
537 state.profile.blob_prep_ms += read_elapsed;
538 state.profile.blob_write_ms += enqueue_elapsed;
539 state.entries.push(TreeEntry::file(
540 entry.name.to_string(),
541 hash,
542 entry.executable,
543 )?);
544 Ok(())
545 }
546
547 fn visit_symlink(
548 &mut self,
549 entry: WalkEntry<'_>,
550 _tree_entry: Option<&TreeEntry>,
551 state: &mut Self::DirectoryState,
552 ) -> Result<()> {
553 let target = fs::read_link(entry.path)?;
554 let symlink_dir = entry.path.parent().unwrap_or(self.walk_root);
566 if !validate_symlink_target(self.walk_root, symlink_dir, &target) {
567 return Err(HeddleError::InvalidSymlinkTarget(target));
568 }
569
570 let blob = Blob::new(objects::util::symlink_target_bytes(&target));
571 let hash = blob.hash();
572 let enqueue_start = Instant::now();
573 self.enqueue_blob(blob, hash)?;
574 state.profile.blob_write_ms += enqueue_start.elapsed().as_millis();
575 state
576 .entries
577 .push(TreeEntry::symlink(entry.name.to_string(), hash)?);
578 Ok(())
579 }
580
581 fn visit_directory_output(
582 &mut self,
583 entry: WalkEntry<'_>,
584 _tree_entry: Option<&TreeEntry>,
585 subtree: TreeBuildOutput,
586 state: &mut Self::DirectoryState,
587 ) -> Result<()> {
588 trace!(dir = %entry.path.display(), "Processing directory");
589 state.profile.blob_prep_ms += subtree.profile.blob_prep_ms;
590 state.profile.blob_write_ms += subtree.profile.blob_write_ms;
591 state.profile.tree_write_ms += subtree.profile.tree_write_ms;
592 state.profile.file_count += subtree.profile.file_count;
593 state.profile.dir_count += subtree.profile.dir_count + 1;
594 let store_start = Instant::now();
595 let hash = self.repo.store.put_tree(&subtree.tree)?;
596 state.profile.tree_write_ms += store_start.elapsed().as_millis();
597 state
598 .entries
599 .push(TreeEntry::directory(entry.name.to_string(), hash)?);
600 Ok(())
601 }
602
603 fn visit_missing(
604 &mut self,
605 _rel_path: &Path,
606 _tree_entry: &TreeEntry,
607 _state: &mut Self::DirectoryState,
608 ) -> Result<()> {
609 Ok(())
610 }
611
612 fn leave_directory(
613 &mut self,
614 directory: &WalkDirectory<'_>,
615 _tree: Option<&Tree>,
616 state: Self::DirectoryState,
617 ) -> Result<TreeBuildOutput> {
618 debug!(
619 dir = %self.repo.root().join(directory.rel_path).display(),
620 files = state.profile.file_count,
621 dirs = state.profile.dir_count,
622 "Directory processed"
623 );
624 Ok(TreeBuildOutput {
625 tree: Tree::from_entries(state.entries),
626 profile: state.profile,
627 })
628 }
629}
630
631#[cfg(test)]
632mod tests {
633 use objects::object::ContentHash;
634 use tempfile::TempDir;
635
636 use crate::worktree_walk::{read_blob_with_hash, read_file_hash};
637
638 #[test]
639 fn read_blob_with_hash_uses_bytes_read_when_file_grows() {
640 let temp_dir = TempDir::new().unwrap();
641 let path = temp_dir.path().join("file.txt");
642
643 std::fs::write(&path, b"abc").unwrap();
644 let initial_size = std::fs::metadata(&path).unwrap().len();
645 std::fs::write(&path, b"abcdef").unwrap();
646
647 let (blob, hash) = read_blob_with_hash(&path, initial_size).unwrap();
648
649 assert_eq!(blob.content(), b"abcdef");
650 assert_eq!(hash, blob.hash());
651 }
652
653 #[test]
654 fn read_file_hash_uses_bytes_read_when_file_grows() {
655 let temp_dir = TempDir::new().unwrap();
656 let path = temp_dir.path().join("file.txt");
657
658 std::fs::write(&path, b"abc").unwrap();
659 let initial_size = std::fs::metadata(&path).unwrap().len();
660 std::fs::write(&path, b"abcdef").unwrap();
661
662 let hash = read_file_hash(&path, initial_size).unwrap();
663
664 assert_eq!(hash, ContentHash::compute_typed("blob", b"abcdef"));
665 }
666}