wtg_cli/
git.rs

1use std::{
2    collections::HashSet,
3    fs,
4    io::{Error as IoError, ErrorKind},
5    path::{Path, PathBuf},
6    process::{Command, Stdio},
7    sync::{Arc, Mutex},
8};
9
10use chrono::{DateTime, TimeZone, Utc};
11use git2::{Commit, FetchOptions, Oid, RemoteCallbacks, Repository};
12
13use crate::error::{WtgError, WtgResult};
14use crate::github::{GhRepoInfo, ReleaseInfo};
15use crate::notice::{Notice, NoticeCallback, no_notices};
16use crate::parse_input::parse_github_repo_url;
17use crate::remote::{RemoteHost, RemoteInfo, RemoteKind};
18pub use crate::semver::{SemverInfo, parse_semver};
19
20/// Tracks what data has been synchronized from remote.
21///
22/// This helps avoid redundant network calls:
23/// - If `full_metadata_synced`, we've done a filter clone or full fetch, so all refs are known
24/// - If a commit is in `fetched_commits`, we've already fetched it individually
25/// - If `tags_synced`, we've fetched all tags
26#[derive(Default)]
27struct FetchState {
28    /// True if we did a full metadata fetch (filter clone or fetch --all)
29    full_metadata_synced: bool,
30    /// Specific commits we've fetched individually
31    fetched_commits: HashSet<String>,
32    /// True if we've fetched all tags
33    tags_synced: bool,
34}
35
36pub struct GitRepo {
37    repo: Arc<Mutex<Repository>>,
38    path: PathBuf,
39    /// Remote URL for fetching
40    remote_url: Option<String>,
41    /// GitHub repository info (owner/repo) if explicitly set
42    gh_repo_info: Option<GhRepoInfo>,
43    /// Whether fetching is allowed
44    allow_fetch: bool,
45    /// Tracks what's been synced from remote
46    fetch_state: Mutex<FetchState>,
47    /// Callback for emitting notices
48    notice_cb: NoticeCallback,
49}
50
51#[derive(Debug, Clone)]
52pub struct CommitInfo {
53    pub hash: String,
54    pub short_hash: String,
55    pub message: String,
56    pub message_lines: usize,
57    pub commit_url: Option<String>,
58    pub author_name: String,
59    pub author_email: Option<String>,
60    pub author_login: Option<String>,
61    pub author_url: Option<String>,
62    pub date: DateTime<Utc>,
63}
64
65#[derive(Debug, Clone)]
66pub struct FileInfo {
67    pub path: String,
68    pub last_commit: CommitInfo,
69    pub previous_authors: Vec<(String, String, String)>, // (hash, name, email)
70}
71
72#[derive(Debug, Clone)]
73pub struct TagInfo {
74    pub name: String,
75    pub commit_hash: String,
76    pub semver_info: Option<SemverInfo>,
77    pub created_at: DateTime<Utc>, // Timestamp of the commit the tag points to
78    pub is_release: bool,          // Whether this is a GitHub release
79    pub release_name: Option<String>, // GitHub release name (if is_release)
80    pub release_url: Option<String>, // GitHub release URL (if is_release)
81    pub published_at: Option<DateTime<Utc>>, // GitHub release published date (if is_release)
82}
83
84impl TagInfo {
85    /// Whether this is a semver tag
86    #[must_use]
87    pub const fn is_semver(&self) -> bool {
88        self.semver_info.is_some()
89    }
90
91    /// Whether this tag represents a stable release (no pre-release, no build metadata)
92    #[must_use]
93    pub const fn is_stable_semver(&self) -> bool {
94        if let Some(semver) = &self.semver_info {
95            semver.pre_release.is_none()
96                && semver.build_metadata.is_none()
97                && semver.build.is_none()
98        } else {
99            false
100        }
101    }
102}
103
104impl GitRepo {
105    /// Open the git repository from the current directory.
106    /// Fetch is disabled by default for local repos.
107    pub fn open() -> WtgResult<Self> {
108        let repo = Repository::discover(".").map_err(|_| WtgError::NotInGitRepo)?;
109        let path = repo.path().to_path_buf();
110        let remote_url = Self::extract_remote_url(&repo);
111        Ok(Self {
112            repo: Arc::new(Mutex::new(repo)),
113            path,
114            remote_url,
115            gh_repo_info: None,
116            allow_fetch: false,
117            fetch_state: Mutex::new(FetchState::default()),
118            notice_cb: no_notices(),
119        })
120    }
121
122    /// Open the git repository from a specific path.
123    /// Fetch is disabled by default.
124    pub fn from_path(path: &Path) -> WtgResult<Self> {
125        let repo = Repository::open(path).map_err(|_| WtgError::NotInGitRepo)?;
126        let repo_path = repo.path().to_path_buf();
127        let remote_url = Self::extract_remote_url(&repo);
128        Ok(Self {
129            repo: Arc::new(Mutex::new(repo)),
130            path: repo_path,
131            remote_url,
132            gh_repo_info: None,
133            allow_fetch: false,
134            fetch_state: Mutex::new(FetchState::default()),
135            notice_cb: no_notices(),
136        })
137    }
138
139    /// Open or clone a remote GitHub repository.
140    /// Uses a cache directory (~/.cache/wtg/repos). Fetch is enabled by default.
141    pub fn remote(gh_repo_info: GhRepoInfo) -> WtgResult<Self> {
142        Self::remote_with_notices(gh_repo_info, no_notices())
143    }
144
145    /// Open or clone a remote GitHub repository with a notice callback.
146    /// Uses a cache directory (~/.cache/wtg/repos). Fetch is enabled by default.
147    pub fn remote_with_notices(
148        gh_repo_info: GhRepoInfo,
149        notice_cb: NoticeCallback,
150    ) -> WtgResult<Self> {
151        let emit = |n: Notice| (notice_cb)(n);
152
153        let cache_dir = get_cache_dir()?;
154        let repo_cache_path =
155            cache_dir.join(format!("{}/{}", gh_repo_info.owner(), gh_repo_info.repo()));
156
157        // Check if already cloned
158        let full_metadata_synced =
159            if repo_cache_path.exists() && Repository::open(&repo_cache_path).is_ok() {
160                // Cache exists - try to fetch to ensure metadata is fresh
161                match update_remote_repo(&repo_cache_path, &emit) {
162                    Ok(()) => true,
163                    Err(e) => {
164                        emit(Notice::CacheUpdateFailed {
165                            error: e.to_string(),
166                        });
167                        false // Continue with stale cache
168                    }
169                }
170            } else {
171                // Clone it (with filter=blob:none for efficiency)
172                clone_remote_repo(
173                    gh_repo_info.owner(),
174                    gh_repo_info.repo(),
175                    &repo_cache_path,
176                    &emit,
177                )?;
178                true // Fresh clone has all metadata
179            };
180
181        let repo = Repository::open(&repo_cache_path).map_err(|_| WtgError::NotInGitRepo)?;
182        let path = repo.path().to_path_buf();
183        let remote_url = Some(format!(
184            "https://github.com/{}/{}.git",
185            gh_repo_info.owner(),
186            gh_repo_info.repo()
187        ));
188
189        Ok(Self {
190            repo: Arc::new(Mutex::new(repo)),
191            path,
192            remote_url,
193            gh_repo_info: Some(gh_repo_info),
194            allow_fetch: true,
195            fetch_state: Mutex::new(FetchState {
196                full_metadata_synced,
197                ..Default::default()
198            }),
199            notice_cb,
200        })
201    }
202
203    /// Get the repository path
204    #[must_use]
205    pub fn path(&self) -> &Path {
206        &self.path
207    }
208
209    /// Check if this is a shallow repository (internal use only)
210    fn is_shallow(&self) -> bool {
211        self.with_repo(git2::Repository::is_shallow)
212    }
213
214    /// Get the remote URL for fetching
215    #[must_use]
216    pub fn remote_url(&self) -> Option<&str> {
217        self.remote_url.as_deref()
218    }
219
220    /// Set whether fetching is allowed.
221    /// Use this to enable `--fetch` flag for local repos.
222    pub const fn set_allow_fetch(&mut self, allow: bool) {
223        self.allow_fetch = allow;
224    }
225
226    /// Set the notice callback for emitting operational messages.
227    pub fn set_notice_callback(&mut self, cb: NoticeCallback) {
228        self.notice_cb = cb;
229    }
230
231    /// Emit a notice via the callback.
232    fn emit(&self, notice: Notice) {
233        (self.notice_cb)(notice);
234    }
235
236    /// Get a reference to the stored GitHub repo info (owner/repo) if explicitly set.
237    #[must_use]
238    pub const fn gh_repo_info(&self) -> Option<&GhRepoInfo> {
239        self.gh_repo_info.as_ref()
240    }
241
242    fn with_repo<T>(&self, f: impl FnOnce(&Repository) -> T) -> T {
243        let repo = self.repo.lock().expect("git repository mutex poisoned");
244        f(&repo)
245    }
246
247    /// Collect all remotes from a repository as `RemoteInfo` structs.
248    fn collect_remotes(repo: &Repository) -> Vec<RemoteInfo> {
249        let remote_names: Vec<String> = repo
250            .remotes()
251            .map(|names| names.iter().flatten().map(str::to_string).collect())
252            .unwrap_or_default();
253
254        remote_names
255            .into_iter()
256            .filter_map(|name| {
257                let remote = repo.find_remote(&name).ok()?;
258                let url = remote.url()?.to_string();
259                Some(RemoteInfo {
260                    name: name.clone(),
261                    kind: RemoteKind::from_name(&name),
262                    host: RemoteHost::from_url(&url),
263                    url,
264                })
265            })
266            .collect()
267    }
268
269    /// Extract remote URL from repository, preferring upstream over origin.
270    fn extract_remote_url(repo: &Repository) -> Option<String> {
271        let mut remotes = Self::collect_remotes(repo);
272        remotes.sort_by_key(RemoteInfo::priority);
273        remotes.into_iter().next().map(|r| r.url)
274    }
275
276    /// Find a commit by hash (can be short or full).
277    /// If `allow_fetch` is true and the commit isn't found locally, attempts to fetch it.
278    pub fn find_commit(&self, hash_str: &str) -> WtgResult<Option<CommitInfo>> {
279        // 1. Try local first
280        if let Some(commit) = self.find_commit_local(hash_str) {
281            return Ok(Some(commit));
282        }
283
284        // 2. If we've already synced all metadata, commit doesn't exist
285        {
286            let state = self.fetch_state.lock().expect("fetch state mutex poisoned");
287            if state.full_metadata_synced {
288                return Ok(None);
289            }
290            // Check if we've already tried to fetch this commit
291            if state.fetched_commits.contains(hash_str) {
292                return Ok(None);
293            }
294        }
295
296        // 3. If fetch not allowed, return None
297        if !self.allow_fetch {
298            return Ok(None);
299        }
300
301        // 4. For shallow repos, warn and prefer API fallback to avoid huge downloads
302        if self.is_shallow() {
303            self.emit(Notice::ShallowRepoDetected);
304            return Ok(None);
305        }
306
307        // 5. Need remote URL to fetch
308        let Some(remote_url) = &self.remote_url else {
309            return Ok(None);
310        };
311
312        // 6. Check ls-remote before fetching (avoid downloading if ref doesn't exist)
313        if !ls_remote_ref_exists(remote_url, hash_str)? {
314            // Mark as fetched (attempted) so we don't retry
315            self.fetch_state
316                .lock()
317                .expect("fetch state mutex poisoned")
318                .fetched_commits
319                .insert(hash_str.to_string());
320            return Ok(None);
321        }
322
323        // 7. Fetch the specific commit
324        fetch_commit(&self.path, remote_url, hash_str)?;
325
326        // 8. Mark as fetched
327        self.fetch_state
328            .lock()
329            .expect("fetch state mutex poisoned")
330            .fetched_commits
331            .insert(hash_str.to_string());
332
333        // 9. Retry local lookup
334        Ok(self.find_commit_local(hash_str))
335    }
336
337    /// Find a commit by hash locally only (no fetch).
338    #[must_use]
339    pub fn find_commit_local(&self, hash_str: &str) -> Option<CommitInfo> {
340        self.with_repo(|repo| {
341            if let Ok(oid) = Oid::from_str(hash_str)
342                && let Ok(commit) = repo.find_commit(oid)
343            {
344                return Some(Self::commit_to_info(&commit));
345            }
346
347            if hash_str.len() >= 7
348                && let Ok(obj) = repo.revparse_single(hash_str)
349                && let Ok(commit) = obj.peel_to_commit()
350            {
351                return Some(Self::commit_to_info(&commit));
352            }
353
354            None
355        })
356    }
357
358    pub fn has_path_at_head(&self, path: &str) -> bool {
359        self.with_repo(|repo| {
360            let Ok(head) = repo.head() else {
361                return false;
362            };
363            let Ok(commit) = head.peel_to_commit() else {
364                return false;
365            };
366            let Ok(tree) = commit.tree() else {
367                return false;
368            };
369            tree.get_path(Path::new(path)).is_ok()
370        })
371    }
372
373    pub fn has_tag_named(&self, name: &str) -> bool {
374        self.get_tags().into_iter().any(|tag| tag.name == name)
375    }
376
377    pub fn find_branch_path_match(&self, segments: &[String]) -> Option<(String, Vec<String>)> {
378        // Collect candidates inside the closure to avoid lifetime issues with References
379        let candidates: Vec<(String, Vec<String>)> = self.with_repo(|repo| {
380            let refs = repo.references().ok()?;
381            let mut candidates = Vec::new();
382
383            for reference in refs.flatten() {
384                let Some(name) = reference.name().and_then(|n| n.strip_prefix("refs/heads/"))
385                else {
386                    continue;
387                };
388                let branch_segments: Vec<&str> = name.split('/').collect();
389                if branch_segments.len() > segments.len() {
390                    continue;
391                }
392                let matches_prefix = branch_segments
393                    .iter()
394                    .zip(segments.iter())
395                    .all(|(branch, segment)| *branch == segment.as_str());
396                if matches_prefix {
397                    let remainder: Vec<String> = segments[branch_segments.len()..].to_vec();
398                    candidates.push((name.to_string(), remainder));
399                }
400            }
401            Some(candidates)
402        })?;
403
404        // Filter candidates by checking path existence outside the closure
405        let valid: Vec<_> = candidates
406            .into_iter()
407            .filter(|(branch, remainder)| self.branch_path_exists(branch, remainder))
408            .collect();
409
410        if valid.len() == 1 {
411            return Some(valid.into_iter().next().unwrap());
412        }
413
414        None
415    }
416
417    fn branch_path_exists(&self, branch: &str, segments: &[String]) -> bool {
418        if segments.is_empty() {
419            return false;
420        }
421
422        let mut path = PathBuf::new();
423        for segment in segments {
424            path.push(segment);
425        }
426
427        self.with_repo(|repo| {
428            let Ok(obj) = repo.revparse_single(branch) else {
429                return false;
430            };
431            let Ok(commit) = obj.peel_to_commit() else {
432                return false;
433            };
434            let Ok(tree) = commit.tree() else {
435                return false;
436            };
437            tree.get_path(&path).is_ok()
438        })
439    }
440
441    /// Find a file in the repository
442    #[must_use]
443    pub fn find_file_on_branch(&self, branch: &str, path: &str) -> Option<FileInfo> {
444        self.with_repo(|repo| {
445            let obj = repo.revparse_single(branch).ok()?;
446            let commit = obj.peel_to_commit().ok()?;
447            let mut revwalk = repo.revwalk().ok()?;
448            revwalk.push(commit.id()).ok()?;
449
450            for oid in revwalk {
451                let oid = oid.ok()?;
452                let commit = repo.find_commit(oid).ok()?;
453
454                if commit_touches_file(&commit, path) {
455                    let commit_info = Self::commit_to_info(&commit);
456                    let previous_authors =
457                        Self::get_previous_authors_from(repo, path, &commit, 4, |revwalk| {
458                            revwalk.push(commit.id())
459                        });
460
461                    return Some(FileInfo {
462                        path: path.to_string(),
463                        last_commit: commit_info,
464                        previous_authors,
465                    });
466                }
467            }
468
469            None
470        })
471    }
472
473    fn get_previous_authors_from(
474        repo: &Repository,
475        path: &str,
476        last_commit: &Commit,
477        limit: usize,
478        seed_revwalk: impl FnOnce(&mut git2::Revwalk) -> Result<(), git2::Error>,
479    ) -> Vec<(String, String, String)> {
480        let mut authors = Vec::new();
481        let Ok(mut revwalk) = repo.revwalk() else {
482            return authors;
483        };
484
485        if seed_revwalk(&mut revwalk).is_err() {
486            return authors;
487        }
488
489        let mut found_last = false;
490
491        for oid in revwalk {
492            if authors.len() >= limit {
493                break;
494            }
495
496            let Ok(oid) = oid else { continue };
497
498            let Ok(commit) = repo.find_commit(oid) else {
499                continue;
500            };
501
502            if !found_last {
503                if commit.id() == last_commit.id() {
504                    found_last = true;
505                }
506                continue;
507            }
508
509            if !commit_touches_file(&commit, path) {
510                continue;
511            }
512
513            let author = commit.author();
514            let name = author.name().unwrap_or("Unknown").to_string();
515            let email = author.email().unwrap_or("").to_string();
516
517            // Skip duplicates
518            if !authors.iter().any(|(_, n, e)| *n == name && *e == email) {
519                authors.push((commit.id().to_string(), name, email));
520            }
521        }
522
523        authors
524    }
525
526    /// Get all tags in the repository.
527    #[must_use]
528    pub fn get_tags(&self) -> Vec<TagInfo> {
529        self.with_repo(|repo| {
530            let mut tags = Vec::new();
531
532            if let Ok(tag_names) = repo.tag_names(None) {
533                for tag_name in tag_names.iter().flatten() {
534                    if let Ok(obj) = repo.revparse_single(tag_name)
535                        && let Ok(commit) = obj.peel_to_commit()
536                    {
537                        tags.push(TagInfo {
538                            name: tag_name.to_string(),
539                            commit_hash: commit.id().to_string(),
540                            semver_info: parse_semver(tag_name),
541                            created_at: git_time_to_datetime(commit.time()),
542                            is_release: false,
543                            release_name: None,
544                            release_url: None,
545                            published_at: None,
546                        });
547                    }
548                }
549            }
550
551            tags
552        })
553    }
554
555    /// Expose tags that contain the specified commit.
556    /// If `allow_fetch` is true, ensures tags are fetched first.
557    pub fn tags_containing_commit(&self, commit_hash: &str) -> Vec<TagInfo> {
558        // Ensure tags are available (fetches if needed)
559        let _ = self.ensure_tags();
560
561        let Ok(commit_oid) = Oid::from_str(commit_hash) else {
562            return Vec::new();
563        };
564
565        self.find_tags_containing_commit(commit_oid)
566            .unwrap_or_default()
567    }
568
569    /// Ensure all tags are available (fetches if needed).
570    fn ensure_tags(&self) -> WtgResult<()> {
571        {
572            let state = self.fetch_state.lock().expect("fetch state mutex poisoned");
573            if state.tags_synced || state.full_metadata_synced {
574                return Ok(());
575            }
576        }
577
578        if !self.allow_fetch {
579            return Ok(()); // Don't fetch if not allowed
580        }
581
582        let Some(remote_url) = &self.remote_url else {
583            return Ok(()); // No remote to fetch from
584        };
585
586        fetch_tags(&self.path, remote_url)?;
587
588        self.fetch_state
589            .lock()
590            .expect("fetch state mutex poisoned")
591            .tags_synced = true;
592
593        Ok(())
594    }
595
596    /// Convert a GitHub release into tag metadata if the tag exists locally.
597    #[must_use]
598    pub fn tag_from_release(&self, release: &ReleaseInfo) -> Option<TagInfo> {
599        self.with_repo(|repo| {
600            let obj = repo.revparse_single(&release.tag_name).ok()?;
601            let commit = obj.peel_to_commit().ok()?;
602            let semver_info = parse_semver(&release.tag_name);
603
604            Some(TagInfo {
605                name: release.tag_name.clone(),
606                commit_hash: commit.id().to_string(),
607                semver_info,
608                is_release: true,
609                release_name: release.name.clone(),
610                release_url: Some(release.url.clone()),
611                published_at: release.published_at,
612                created_at: git_time_to_datetime(commit.time()),
613            })
614        })
615    }
616
617    /// Check whether a release tag contains the specified commit.
618    #[must_use]
619    pub fn tag_contains_commit(&self, tag_commit_hash: &str, commit_hash: &str) -> bool {
620        let Ok(tag_oid) = Oid::from_str(tag_commit_hash) else {
621            return false;
622        };
623        let Ok(commit_oid) = Oid::from_str(commit_hash) else {
624            return false;
625        };
626
627        self.is_ancestor(commit_oid, tag_oid)
628    }
629
630    /// Find all tags that contain a given commit (git-only, no GitHub enrichment)
631    /// Returns None if no tags contain the commit
632    /// Performance: Filters by timestamp before doing expensive ancestry checks
633    fn find_tags_containing_commit(&self, commit_oid: Oid) -> Option<Vec<TagInfo>> {
634        self.with_repo(|repo| {
635            let target_commit = repo.find_commit(commit_oid).ok()?;
636            let target_timestamp = target_commit.time().seconds();
637
638            let mut containing_tags = Vec::new();
639            let tag_names = repo.tag_names(None).ok()?;
640
641            for tag_name in tag_names.iter().flatten() {
642                if let Ok(obj) = repo.revparse_single(tag_name)
643                    && let Ok(commit) = obj.peel_to_commit()
644                {
645                    let tag_oid = commit.id();
646
647                    // Performance: Skip tags with commits older than target
648                    // (they cannot possibly contain the target commit)
649                    if commit.time().seconds() < target_timestamp {
650                        continue;
651                    }
652
653                    // Check if this tag points to the commit or if the tag is a descendant
654                    if tag_oid == commit_oid
655                        || repo
656                            .graph_descendant_of(tag_oid, commit_oid)
657                            .unwrap_or(false)
658                    {
659                        let semver_info = parse_semver(tag_name);
660
661                        containing_tags.push(TagInfo {
662                            name: tag_name.to_string(),
663                            commit_hash: tag_oid.to_string(),
664                            semver_info,
665                            created_at: git_time_to_datetime(commit.time()),
666                            is_release: false,
667                            release_name: None,
668                            release_url: None,
669                            published_at: None,
670                        });
671                    }
672                }
673            }
674
675            if containing_tags.is_empty() {
676                None
677            } else {
678                Some(containing_tags)
679            }
680        })
681    }
682
683    /// Get commit timestamp for sorting (helper)
684    pub(crate) fn get_commit_timestamp(&self, commit_hash: &str) -> i64 {
685        self.with_repo(|repo| {
686            Oid::from_str(commit_hash)
687                .and_then(|oid| repo.find_commit(oid))
688                .map(|c| c.time().seconds())
689                .unwrap_or(0)
690        })
691    }
692
693    /// Check if commit1 is an ancestor of commit2
694    fn is_ancestor(&self, ancestor: Oid, descendant: Oid) -> bool {
695        self.with_repo(|repo| {
696            repo.graph_descendant_of(descendant, ancestor)
697                .unwrap_or(false)
698        })
699    }
700
701    /// Iterate over all remotes in the repository.
702    /// Returns an iterator of `RemoteInfo`.
703    pub fn remotes(&self) -> impl Iterator<Item = RemoteInfo> {
704        self.with_repo(Self::collect_remotes).into_iter()
705    }
706
707    /// Get the GitHub remote info.
708    /// Returns stored `gh_repo_info` if set, otherwise extracts from git remotes
709    /// using the `remotes()` API with priority ordering (upstream > origin > other,
710    /// GitHub remotes first within each kind).
711    #[must_use]
712    pub fn github_remote(&self) -> Option<GhRepoInfo> {
713        // Return stored gh_repo_info if explicitly set (e.g., from remote() constructor)
714        if let Some(info) = &self.gh_repo_info {
715            return Some(info.clone());
716        }
717
718        // Use remotes() API to find the best GitHub remote
719        let mut remotes: Vec<_> = self.remotes().collect();
720        remotes.sort_by_key(RemoteInfo::priority);
721
722        // Find the first GitHub remote and parse its URL
723        remotes
724            .into_iter()
725            .find(|r| r.host == Some(RemoteHost::GitHub))
726            .and_then(|r| parse_github_repo_url(&r.url))
727    }
728
729    /// Convert a `git2::Commit` to `CommitInfo`
730    fn commit_to_info(commit: &Commit) -> CommitInfo {
731        let message = commit.message().unwrap_or("").to_string();
732        let lines: Vec<&str> = message.lines().collect();
733        let message_lines = lines.len();
734        let time = commit.time();
735
736        CommitInfo {
737            hash: commit.id().to_string(),
738            short_hash: commit.id().to_string()[..7].to_string(),
739            message: (*lines.first().unwrap_or(&"")).to_string(),
740            message_lines,
741            commit_url: None,
742            author_name: commit.author().name().unwrap_or("Unknown").to_string(),
743            author_email: commit.author().email().map(str::to_string),
744            author_login: None,
745            author_url: None,
746            date: Utc.timestamp_opt(time.seconds(), 0).unwrap(),
747        }
748    }
749}
750
751/// Check if a string looks like a git commit hash (7-40 hex characters).
752pub(crate) fn looks_like_commit_hash(input: &str) -> bool {
753    let trimmed = input.trim();
754    trimmed.len() >= 7 && trimmed.len() <= 40 && trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
755}
756
757/// Check if a commit touches a specific file
758fn commit_touches_file(commit: &Commit, path: &str) -> bool {
759    let Ok(tree) = commit.tree() else {
760        return false;
761    };
762
763    let target_path = Path::new(path);
764    let current_entry = tree.get_path(target_path).ok();
765
766    // Root commit: if the file exists now, this commit introduced it
767    if commit.parent_count() == 0 {
768        return current_entry.is_some();
769    }
770
771    for parent in commit.parents() {
772        let Ok(parent_tree) = parent.tree() else {
773            continue;
774        };
775
776        let previous_entry = parent_tree.get_path(target_path).ok();
777        if tree_entries_differ(current_entry.as_ref(), previous_entry.as_ref()) {
778            return true;
779        }
780    }
781
782    false
783}
784
785fn tree_entries_differ(
786    current: Option<&git2::TreeEntry<'_>>,
787    previous: Option<&git2::TreeEntry<'_>>,
788) -> bool {
789    match (current, previous) {
790        (None, None) => false,
791        (Some(_), None) | (None, Some(_)) => true,
792        (Some(current_entry), Some(previous_entry)) => {
793            current_entry.id() != previous_entry.id()
794                || current_entry.filemode() != previous_entry.filemode()
795        }
796    }
797}
798
799/// Convert `git2::Time` to `chrono::DateTime<Utc>`
800#[must_use]
801pub fn git_time_to_datetime(time: git2::Time) -> DateTime<Utc> {
802    Utc.timestamp_opt(time.seconds(), 0).unwrap()
803}
804
805// ========================================
806// Remote/cache helper functions
807// ========================================
808
809/// Get the cache directory for remote repositories
810fn get_cache_dir() -> WtgResult<PathBuf> {
811    let cache_dir = dirs::cache_dir()
812        .ok_or_else(|| {
813            WtgError::Io(IoError::new(
814                ErrorKind::NotFound,
815                "Could not determine cache directory",
816            ))
817        })?
818        .join("wtg")
819        .join("repos");
820
821    if !cache_dir.exists() {
822        fs::create_dir_all(&cache_dir)?;
823    }
824
825    Ok(cache_dir)
826}
827
828/// Clone a remote repository using subprocess with filter=blob:none, falling back to git2 if needed
829fn clone_remote_repo(
830    owner: &str,
831    repo: &str,
832    target_path: &Path,
833    emit: &dyn Fn(Notice),
834) -> WtgResult<()> {
835    // Create parent directory
836    if let Some(parent) = target_path.parent() {
837        fs::create_dir_all(parent)?;
838    }
839
840    let repo_url = format!("https://github.com/{owner}/{repo}.git");
841
842    emit(Notice::CloningRepo {
843        url: repo_url.clone(),
844    });
845
846    // Try subprocess with --filter=blob:none first (requires Git 2.17+)
847    match clone_with_filter(&repo_url, target_path) {
848        Ok(()) => {
849            emit(Notice::CloneSucceeded { used_filter: true });
850            Ok(())
851        }
852        Err(e) => {
853            emit(Notice::CloneFallbackToBare {
854                error: e.to_string(),
855            });
856            // Fall back to git2 bare clone
857            clone_bare_with_git2(&repo_url, target_path, emit)
858        }
859    }
860}
861
862/// Clone with --filter=blob:none using subprocess
863fn clone_with_filter(repo_url: &str, target_path: &Path) -> WtgResult<()> {
864    let output = Command::new("git")
865        .args([
866            "clone",
867            "--filter=blob:none", // Don't download blobs until needed (Git 2.17+)
868            "--bare",             // Bare repository (no working directory)
869            repo_url,
870            target_path.to_str().ok_or_else(|| {
871                WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path"))
872            })?,
873        ])
874        .output()?;
875
876    if !output.status.success() {
877        let error = String::from_utf8_lossy(&output.stderr);
878        return Err(WtgError::Io(IoError::other(format!(
879            "Failed to clone with filter: {error}"
880        ))));
881    }
882
883    Ok(())
884}
885
886/// Clone bare repository using git2 (fallback)
887fn clone_bare_with_git2(
888    repo_url: &str,
889    target_path: &Path,
890    emit: &dyn Fn(Notice),
891) -> WtgResult<()> {
892    // Clone without progress output for cleaner UX
893    let callbacks = RemoteCallbacks::new();
894
895    let mut fetch_options = FetchOptions::new();
896    fetch_options.remote_callbacks(callbacks);
897
898    // Build the repository with options
899    let mut builder = git2::build::RepoBuilder::new();
900    builder.fetch_options(fetch_options);
901    builder.bare(true); // Bare repository - no working directory, only git metadata
902
903    // Clone the repository as bare
904    // This gets all commits, branches, and tags without checking out files
905    builder.clone(repo_url, target_path)?;
906
907    emit(Notice::CloneSucceeded { used_filter: false });
908
909    Ok(())
910}
911
912/// Update an existing cloned remote repository
913fn update_remote_repo(repo_path: &Path, emit: &dyn Fn(Notice)) -> WtgResult<()> {
914    emit(Notice::UpdatingCache);
915
916    // Try subprocess fetch first (works for both filter and non-filter repos)
917    match fetch_with_subprocess(repo_path) {
918        Ok(()) => {
919            emit(Notice::CacheUpdated);
920            Ok(())
921        }
922        Err(_) => {
923            // Fall back to git2
924            fetch_with_git2(repo_path, emit)
925        }
926    }
927}
928
929/// Fetch updates using subprocess
930fn fetch_with_subprocess(repo_path: &Path) -> WtgResult<()> {
931    let args = build_fetch_args(repo_path)?;
932
933    let output = Command::new("git").args(&args).output()?;
934
935    if !output.status.success() {
936        let error = String::from_utf8_lossy(&output.stderr);
937        return Err(WtgError::Io(IoError::other(format!(
938            "Failed to fetch: {error}"
939        ))));
940    }
941
942    Ok(())
943}
944
945/// Build the arguments passed to `git fetch` when refreshing cached repos.
946fn build_fetch_args(repo_path: &Path) -> WtgResult<Vec<String>> {
947    let repo_path = repo_path
948        .to_str()
949        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
950
951    Ok(vec![
952        "-C".to_string(),
953        repo_path.to_string(),
954        "fetch".to_string(),
955        "--all".to_string(),
956        "--tags".to_string(),
957        "--force".to_string(),
958        "--prune".to_string(),
959    ])
960}
961
962/// Fetch updates using git2 (fallback)
963fn fetch_with_git2(repo_path: &Path, emit: &dyn Fn(Notice)) -> WtgResult<()> {
964    let repo = Repository::open(repo_path)?;
965
966    // Find the origin remote
967    let mut remote = repo
968        .find_remote("origin")
969        .or_else(|_| repo.find_remote("upstream"))
970        .map_err(WtgError::Git)?;
971
972    // Fetch without progress output for cleaner UX
973    let callbacks = RemoteCallbacks::new();
974    let mut fetch_options = FetchOptions::new();
975    fetch_options.remote_callbacks(callbacks);
976
977    // Fetch all refs
978    remote.fetch(
979        &["refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"],
980        Some(&mut fetch_options),
981        None,
982    )?;
983
984    emit(Notice::CacheUpdated);
985
986    Ok(())
987}
988
989/// Check if a ref exists on remote without fetching (git ls-remote).
990fn ls_remote_ref_exists(remote_url: &str, ref_spec: &str) -> WtgResult<bool> {
991    let output = Command::new("git")
992        .args(["ls-remote", "--exit-code", remote_url, ref_spec])
993        .stderr(Stdio::null())
994        .stdout(Stdio::null())
995        .status();
996
997    match output {
998        Ok(status) => Ok(status.success()),
999        Err(e) => Err(WtgError::Io(e)),
1000    }
1001}
1002
1003/// Fetch a specific commit by hash.
1004fn fetch_commit(repo_path: &Path, remote_url: &str, hash: &str) -> WtgResult<()> {
1005    let repo_path_str = repo_path
1006        .to_str()
1007        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
1008
1009    let output = Command::new("git")
1010        .args(["-C", repo_path_str, "fetch", "--depth=1", remote_url, hash])
1011        .output()?;
1012
1013    if output.status.success() {
1014        Ok(())
1015    } else {
1016        let stderr = String::from_utf8_lossy(&output.stderr);
1017        Err(WtgError::Io(IoError::other(format!(
1018            "Failed to fetch commit {hash}: {stderr}"
1019        ))))
1020    }
1021}
1022
1023/// Fetch all tags from remote.
1024fn fetch_tags(repo_path: &Path, remote_url: &str) -> WtgResult<()> {
1025    let repo_path_str = repo_path
1026        .to_str()
1027        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
1028
1029    let output = Command::new("git")
1030        .args([
1031            "-C",
1032            repo_path_str,
1033            "fetch",
1034            "--tags",
1035            "--force",
1036            remote_url,
1037        ])
1038        .output()?;
1039
1040    if output.status.success() {
1041        Ok(())
1042    } else {
1043        let stderr = String::from_utf8_lossy(&output.stderr);
1044        Err(WtgError::Io(IoError::other(format!(
1045            "Failed to fetch tags: {stderr}"
1046        ))))
1047    }
1048}
1049
1050#[cfg(test)]
1051mod tests {
1052    use tempfile::tempdir;
1053
1054    use super::*;
1055
1056    #[test]
1057    fn file_history_tracks_content_and_metadata_changes() {
1058        const ORIGINAL_PATH: &str = "config/policy.json";
1059        const RENAMED_PATH: &str = "config/policy-renamed.json";
1060        const EXECUTABLE_PATH: &str = "scripts/run.sh";
1061        const DELETED_PATH: &str = "docs/legacy.md";
1062        const DISTRACTION_PATH: &str = "README.md";
1063
1064        let temp = tempdir().expect("temp dir");
1065        let repo = Repository::init(temp.path()).expect("git repo");
1066
1067        commit_file(&repo, DISTRACTION_PATH, "noise", "add distraction");
1068        commit_file(&repo, ORIGINAL_PATH, "{\"version\":1}", "seed config");
1069        commit_file(&repo, ORIGINAL_PATH, "{\"version\":2}", "config tweak");
1070        let rename_commit = rename_file(&repo, ORIGINAL_PATH, RENAMED_PATH, "rename config");
1071        let post_rename_commit = commit_file(
1072            &repo,
1073            RENAMED_PATH,
1074            "{\"version\":3}",
1075            "update renamed config",
1076        );
1077
1078        commit_file(
1079            &repo,
1080            EXECUTABLE_PATH,
1081            "#!/bin/sh\\nprintf hi\n",
1082            "add runner",
1083        );
1084        let exec_mode_commit = change_file_mode(
1085            &repo,
1086            EXECUTABLE_PATH,
1087            git2::FileMode::BlobExecutable,
1088            "make runner executable",
1089        );
1090
1091        commit_file(&repo, DELETED_PATH, "bye", "add temporary file");
1092        let delete_commit = delete_file(&repo, DELETED_PATH, "remove temporary file");
1093
1094        let git_repo = GitRepo::from_path(temp.path()).expect("git repo wrapper");
1095
1096        let renamed_info = git_repo
1097            .find_file_on_branch("HEAD", RENAMED_PATH)
1098            .expect("renamed file info");
1099        assert_eq!(
1100            renamed_info.last_commit.hash,
1101            post_rename_commit.to_string()
1102        );
1103
1104        let original_info = git_repo
1105            .find_file_on_branch("HEAD", ORIGINAL_PATH)
1106            .expect("original file info");
1107        assert_eq!(original_info.last_commit.hash, rename_commit.to_string());
1108
1109        let exec_info = git_repo
1110            .find_file_on_branch("HEAD", EXECUTABLE_PATH)
1111            .expect("exec file info");
1112        assert_eq!(exec_info.last_commit.hash, exec_mode_commit.to_string());
1113
1114        let deleted_info = git_repo
1115            .find_file_on_branch("HEAD", DELETED_PATH)
1116            .expect("deleted file info");
1117        assert_eq!(deleted_info.last_commit.hash, delete_commit.to_string());
1118    }
1119
1120    fn commit_file(repo: &Repository, path: &str, contents: &str, message: &str) -> git2::Oid {
1121        let workdir = repo.workdir().expect("workdir");
1122        let file_path = workdir.join(path);
1123        if let Some(parent) = file_path.parent() {
1124            fs::create_dir_all(parent).expect("create dir");
1125        }
1126        fs::write(&file_path, contents).expect("write file");
1127
1128        let mut index = repo.index().expect("index");
1129        index.add_path(Path::new(path)).expect("add path");
1130        write_tree_and_commit(repo, &mut index, message)
1131    }
1132
1133    fn rename_file(repo: &Repository, from: &str, to: &str, message: &str) -> git2::Oid {
1134        let workdir = repo.workdir().expect("workdir");
1135        let from_path = workdir.join(from);
1136        let to_path = workdir.join(to);
1137        if let Some(parent) = to_path.parent() {
1138            fs::create_dir_all(parent).expect("create dir");
1139        }
1140        fs::rename(&from_path, &to_path).expect("rename file");
1141
1142        let mut index = repo.index().expect("index");
1143        index.remove_path(Path::new(from)).expect("remove old path");
1144        index.add_path(Path::new(to)).expect("add new path");
1145        write_tree_and_commit(repo, &mut index, message)
1146    }
1147
1148    fn delete_file(repo: &Repository, path: &str, message: &str) -> git2::Oid {
1149        let workdir = repo.workdir().expect("workdir");
1150        let file_path = workdir.join(path);
1151        if file_path.exists() {
1152            fs::remove_file(&file_path).expect("remove file");
1153        }
1154
1155        let mut index = repo.index().expect("index");
1156        index.remove_path(Path::new(path)).expect("remove path");
1157        write_tree_and_commit(repo, &mut index, message)
1158    }
1159
1160    fn change_file_mode(
1161        repo: &Repository,
1162        path: &str,
1163        mode: git2::FileMode,
1164        message: &str,
1165    ) -> git2::Oid {
1166        let mut index = repo.index().expect("index");
1167        index.add_path(Path::new(path)).expect("add path");
1168        force_index_mode(&mut index, path, mode);
1169        write_tree_and_commit(repo, &mut index, message)
1170    }
1171
1172    fn force_index_mode(index: &mut git2::Index, path: &str, mode: git2::FileMode) {
1173        if let Some(mut entry) = index.get_path(Path::new(path), 0) {
1174            entry.mode = u32::try_from(i32::from(mode)).expect("valid file mode");
1175            index.add(&entry).expect("re-add entry");
1176        }
1177    }
1178
1179    fn write_tree_and_commit(
1180        repo: &Repository,
1181        index: &mut git2::Index,
1182        message: &str,
1183    ) -> git2::Oid {
1184        index.write().expect("write index");
1185        let tree_oid = index.write_tree().expect("tree oid");
1186        let tree = repo.find_tree(tree_oid).expect("tree");
1187        let sig = test_signature();
1188
1189        let parents = repo
1190            .head()
1191            .ok()
1192            .and_then(|head| head.target())
1193            .and_then(|oid| repo.find_commit(oid).ok())
1194            .into_iter()
1195            .collect::<Vec<_>>();
1196        let parent_refs = parents.iter().collect::<Vec<_>>();
1197
1198        repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &parent_refs)
1199            .expect("commit")
1200    }
1201
1202    fn test_signature() -> git2::Signature<'static> {
1203        git2::Signature::now("Test User", "tester@example.com").expect("sig")
1204    }
1205}