Skip to main content

wtg_cli/
git.rs

1use std::{
2    collections::HashSet,
3    fs,
4    io::{Error as IoError, ErrorKind},
5    path::{Path, PathBuf},
6    process::{Command, Stdio},
7    sync::{Arc, Mutex},
8};
9
10use chrono::{DateTime, TimeZone, Utc};
11use git2::{Commit, FetchOptions, Oid, RemoteCallbacks, Repository};
12
13use crate::error::{WtgError, WtgResult};
14use crate::github::{GhRepoInfo, ReleaseInfo};
15use crate::notice::{Notice, NoticeCallback, no_notices};
16use crate::parse_input::parse_github_repo_url;
17use crate::remote::{RemoteHost, RemoteInfo, RemoteKind};
18pub use crate::semver::{SemverInfo, parse_semver};
19
20/// Tracks what data has been synchronized from remote.
21///
22/// This helps avoid redundant network calls:
23/// - If `full_metadata_synced`, we've done a filter clone or full fetch, so all refs are known
24/// - If a commit is in `fetched_commits`, we've already fetched it individually
25/// - If `tags_synced`, we've fetched all tags
26#[derive(Default)]
27struct FetchState {
28    /// True if we did a full metadata fetch (filter clone or fetch --all)
29    full_metadata_synced: bool,
30    /// Specific commits we've fetched individually
31    fetched_commits: HashSet<String>,
32    /// True if we've fetched all tags
33    tags_synced: bool,
34}
35
36pub struct GitRepo {
37    repo: Arc<Mutex<Repository>>,
38    path: PathBuf,
39    /// Remote URL for fetching
40    remote_url: Option<String>,
41    /// GitHub repository info (owner/repo) if explicitly set
42    gh_repo_info: Option<GhRepoInfo>,
43    /// Whether fetching is allowed
44    allow_fetch: bool,
45    /// Tracks what's been synced from remote
46    fetch_state: Mutex<FetchState>,
47    /// Callback for emitting notices
48    notice_cb: NoticeCallback,
49}
50
51#[derive(Debug, Clone)]
52pub struct CommitInfo {
53    pub hash: String,
54    pub short_hash: String,
55    pub message: String,
56    pub message_lines: usize,
57    pub commit_url: Option<String>,
58    pub author_name: String,
59    pub author_email: Option<String>,
60    pub author_login: Option<String>,
61    pub author_url: Option<String>,
62    pub date: DateTime<Utc>,
63}
64
65#[derive(Debug, Clone)]
66pub struct FileInfo {
67    pub path: String,
68    pub last_commit: CommitInfo,
69    pub previous_authors: Vec<(String, String, String)>, // (hash, name, email)
70}
71
72#[derive(Debug, Clone)]
73pub struct TagInfo {
74    pub name: String,
75    pub commit_hash: String,
76    pub semver_info: Option<SemverInfo>,
77    pub created_at: DateTime<Utc>, // Timestamp of the commit the tag points to
78    pub is_release: bool,          // Whether this is a GitHub release
79    pub release_name: Option<String>, // GitHub release name (if is_release)
80    pub release_url: Option<String>, // GitHub release URL (if is_release)
81    pub published_at: Option<DateTime<Utc>>, // GitHub release published date (if is_release)
82    pub tag_url: Option<String>, // URL to view the tag (tree for plain tags, release page for releases)
83}
84
85impl TagInfo {
86    /// Whether this is a semver tag
87    #[must_use]
88    pub const fn is_semver(&self) -> bool {
89        self.semver_info.is_some()
90    }
91
92    /// Whether this tag represents a stable release (no pre-release, no build metadata)
93    #[must_use]
94    pub const fn is_stable_semver(&self) -> bool {
95        if let Some(semver) = &self.semver_info {
96            semver.pre_release.is_none()
97                && semver.build_metadata.is_none()
98                && semver.build.is_none()
99        } else {
100            false
101        }
102    }
103}
104
105impl GitRepo {
106    /// Open the git repository from the current directory.
107    /// Fetch is disabled by default for local repos.
108    pub fn open() -> WtgResult<Self> {
109        let repo = Repository::discover(".").map_err(|_| WtgError::NotInGitRepo)?;
110        let path = repo.path().to_path_buf();
111        let remote_url = Self::extract_remote_url(&repo);
112        Ok(Self {
113            repo: Arc::new(Mutex::new(repo)),
114            path,
115            remote_url,
116            gh_repo_info: None,
117            allow_fetch: false,
118            fetch_state: Mutex::new(FetchState::default()),
119            notice_cb: no_notices(),
120        })
121    }
122
123    /// Open the git repository from a specific path.
124    /// Fetch is disabled by default.
125    pub fn from_path(path: &Path) -> WtgResult<Self> {
126        let repo = Repository::open(path).map_err(|_| WtgError::NotInGitRepo)?;
127        let repo_path = repo.path().to_path_buf();
128        let remote_url = Self::extract_remote_url(&repo);
129        Ok(Self {
130            repo: Arc::new(Mutex::new(repo)),
131            path: repo_path,
132            remote_url,
133            gh_repo_info: None,
134            allow_fetch: false,
135            fetch_state: Mutex::new(FetchState::default()),
136            notice_cb: no_notices(),
137        })
138    }
139
140    /// Open or clone a remote GitHub repository.
141    /// Uses a cache directory (~/.cache/wtg/repos). Fetch is enabled by default.
142    pub fn remote(gh_repo_info: GhRepoInfo) -> WtgResult<Self> {
143        Self::remote_with_notices(gh_repo_info, no_notices())
144    }
145
146    /// Open or clone a remote GitHub repository with a notice callback.
147    /// Uses a cache directory (~/.cache/wtg/repos). Fetch is enabled by default.
148    pub fn remote_with_notices(
149        gh_repo_info: GhRepoInfo,
150        notice_cb: NoticeCallback,
151    ) -> WtgResult<Self> {
152        let emit = |n: Notice| (notice_cb)(n);
153
154        let cache_dir = get_cache_dir()?;
155        let repo_cache_path =
156            cache_dir.join(format!("{}/{}", gh_repo_info.owner(), gh_repo_info.repo()));
157
158        // Check if already cloned
159        let full_metadata_synced =
160            if repo_cache_path.exists() && Repository::open(&repo_cache_path).is_ok() {
161                // Cache exists - try to fetch to ensure metadata is fresh
162                match update_remote_repo(&repo_cache_path, &emit) {
163                    Ok(()) => true,
164                    Err(e) => {
165                        emit(Notice::CacheUpdateFailed {
166                            error: e.to_string(),
167                        });
168                        false // Continue with stale cache
169                    }
170                }
171            } else {
172                // Clone it (with filter=blob:none for efficiency)
173                clone_remote_repo(
174                    gh_repo_info.owner(),
175                    gh_repo_info.repo(),
176                    &repo_cache_path,
177                    &emit,
178                )?;
179                true // Fresh clone has all metadata
180            };
181
182        let repo = Repository::open(&repo_cache_path).map_err(|_| WtgError::NotInGitRepo)?;
183        let path = repo.path().to_path_buf();
184        let remote_url = Some(format!(
185            "https://github.com/{}/{}.git",
186            gh_repo_info.owner(),
187            gh_repo_info.repo()
188        ));
189
190        Ok(Self {
191            repo: Arc::new(Mutex::new(repo)),
192            path,
193            remote_url,
194            gh_repo_info: Some(gh_repo_info),
195            allow_fetch: true,
196            fetch_state: Mutex::new(FetchState {
197                full_metadata_synced,
198                ..Default::default()
199            }),
200            notice_cb,
201        })
202    }
203
204    /// Get the repository path
205    #[must_use]
206    pub fn path(&self) -> &Path {
207        &self.path
208    }
209
210    /// Check if this is a shallow repository (internal use only)
211    fn is_shallow(&self) -> bool {
212        self.with_repo(git2::Repository::is_shallow)
213    }
214
215    /// Get the remote URL for fetching
216    #[must_use]
217    pub fn remote_url(&self) -> Option<&str> {
218        self.remote_url.as_deref()
219    }
220
221    /// Set whether fetching is allowed.
222    /// Use this to enable `--fetch` flag for local repos.
223    pub const fn set_allow_fetch(&mut self, allow: bool) {
224        self.allow_fetch = allow;
225    }
226
227    /// Set the notice callback for emitting operational messages.
228    pub fn set_notice_callback(&mut self, cb: NoticeCallback) {
229        self.notice_cb = cb;
230    }
231
232    /// Emit a notice via the callback.
233    fn emit(&self, notice: Notice) {
234        (self.notice_cb)(notice);
235    }
236
237    /// Get a reference to the stored GitHub repo info (owner/repo) if explicitly set.
238    #[must_use]
239    pub const fn gh_repo_info(&self) -> Option<&GhRepoInfo> {
240        self.gh_repo_info.as_ref()
241    }
242
243    fn with_repo<T>(&self, f: impl FnOnce(&Repository) -> T) -> T {
244        let repo = self.repo.lock().expect("git repository mutex poisoned");
245        f(&repo)
246    }
247
248    /// Collect all remotes from a repository as `RemoteInfo` structs.
249    fn collect_remotes(repo: &Repository) -> Vec<RemoteInfo> {
250        let remote_names: Vec<String> = repo
251            .remotes()
252            .map(|names| names.iter().flatten().map(str::to_string).collect())
253            .unwrap_or_default();
254
255        remote_names
256            .into_iter()
257            .filter_map(|name| {
258                let remote = repo.find_remote(&name).ok()?;
259                let url = remote.url()?.to_string();
260                Some(RemoteInfo {
261                    name: name.clone(),
262                    kind: RemoteKind::from_name(&name),
263                    host: RemoteHost::from_url(&url),
264                    url,
265                })
266            })
267            .collect()
268    }
269
270    /// Extract remote URL from repository, preferring upstream over origin.
271    fn extract_remote_url(repo: &Repository) -> Option<String> {
272        let mut remotes = Self::collect_remotes(repo);
273        remotes.sort_by_key(RemoteInfo::priority);
274        remotes.into_iter().next().map(|r| r.url)
275    }
276
277    /// Find a commit by hash (can be short or full).
278    /// If `allow_fetch` is true and the commit isn't found locally, attempts to fetch it.
279    pub fn find_commit(&self, hash_str: &str) -> WtgResult<Option<CommitInfo>> {
280        // 1. Try local first
281        if let Some(commit) = self.find_commit_local(hash_str) {
282            return Ok(Some(commit));
283        }
284
285        // 2. If we've already synced all metadata, commit doesn't exist
286        {
287            let state = self.fetch_state.lock().expect("fetch state mutex poisoned");
288            if state.full_metadata_synced {
289                return Ok(None);
290            }
291            // Check if we've already tried to fetch this commit
292            if state.fetched_commits.contains(hash_str) {
293                return Ok(None);
294            }
295        }
296
297        // 3. If fetch not allowed, return None
298        if !self.allow_fetch {
299            return Ok(None);
300        }
301
302        // 4. For shallow repos, warn and prefer API fallback to avoid huge downloads
303        if self.is_shallow() {
304            self.emit(Notice::ShallowRepoDetected);
305            return Ok(None);
306        }
307
308        // 5. Need remote URL to fetch
309        let Some(remote_url) = &self.remote_url else {
310            return Ok(None);
311        };
312
313        // 6. Check ls-remote before fetching (avoid downloading if ref doesn't exist)
314        if !ls_remote_ref_exists(remote_url, hash_str)? {
315            // Mark as fetched (attempted) so we don't retry
316            self.fetch_state
317                .lock()
318                .expect("fetch state mutex poisoned")
319                .fetched_commits
320                .insert(hash_str.to_string());
321            return Ok(None);
322        }
323
324        // 7. Fetch the specific commit
325        fetch_commit(&self.path, remote_url, hash_str)?;
326
327        // 8. Mark as fetched
328        self.fetch_state
329            .lock()
330            .expect("fetch state mutex poisoned")
331            .fetched_commits
332            .insert(hash_str.to_string());
333
334        // 9. Retry local lookup
335        Ok(self.find_commit_local(hash_str))
336    }
337
338    /// Find a commit by hash locally only (no fetch).
339    #[must_use]
340    pub fn find_commit_local(&self, hash_str: &str) -> Option<CommitInfo> {
341        self.with_repo(|repo| {
342            if let Ok(oid) = Oid::from_str(hash_str)
343                && let Ok(commit) = repo.find_commit(oid)
344            {
345                return Some(Self::commit_to_info(&commit));
346            }
347
348            if hash_str.len() >= 7
349                && let Ok(obj) = repo.revparse_single(hash_str)
350                && let Ok(commit) = obj.peel_to_commit()
351            {
352                return Some(Self::commit_to_info(&commit));
353            }
354
355            None
356        })
357    }
358
359    pub fn has_path_at_head(&self, path: &str) -> bool {
360        self.with_repo(|repo| {
361            let Ok(head) = repo.head() else {
362                return false;
363            };
364            let Ok(commit) = head.peel_to_commit() else {
365                return false;
366            };
367            let Ok(tree) = commit.tree() else {
368                return false;
369            };
370            tree.get_path(Path::new(path)).is_ok()
371        })
372    }
373
374    pub fn has_tag_named(&self, name: &str) -> bool {
375        self.get_tags().into_iter().any(|tag| tag.name == name)
376    }
377
378    pub fn find_branch_path_match(&self, segments: &[String]) -> Option<(String, Vec<String>)> {
379        // Collect candidates inside the closure to avoid lifetime issues with References
380        let candidates: Vec<(String, Vec<String>)> = self.with_repo(|repo| {
381            let refs = repo.references().ok()?;
382            let mut candidates = Vec::new();
383
384            for reference in refs.flatten() {
385                let Some(name) = reference.name().and_then(|n| n.strip_prefix("refs/heads/"))
386                else {
387                    continue;
388                };
389                let branch_segments: Vec<&str> = name.split('/').collect();
390                if branch_segments.len() > segments.len() {
391                    continue;
392                }
393                let matches_prefix = branch_segments
394                    .iter()
395                    .zip(segments.iter())
396                    .all(|(branch, segment)| *branch == segment.as_str());
397                if matches_prefix {
398                    let remainder: Vec<String> = segments[branch_segments.len()..].to_vec();
399                    candidates.push((name.to_string(), remainder));
400                }
401            }
402            Some(candidates)
403        })?;
404
405        // Filter candidates by checking path existence outside the closure
406        let valid: Vec<_> = candidates
407            .into_iter()
408            .filter(|(branch, remainder)| self.branch_path_exists(branch, remainder))
409            .collect();
410
411        if valid.len() == 1 {
412            return Some(valid.into_iter().next().unwrap());
413        }
414
415        None
416    }
417
418    fn branch_path_exists(&self, branch: &str, segments: &[String]) -> bool {
419        if segments.is_empty() {
420            return false;
421        }
422
423        let mut path = PathBuf::new();
424        for segment in segments {
425            path.push(segment);
426        }
427
428        self.with_repo(|repo| {
429            let Ok(obj) = repo.revparse_single(branch) else {
430                return false;
431            };
432            let Ok(commit) = obj.peel_to_commit() else {
433                return false;
434            };
435            let Ok(tree) = commit.tree() else {
436                return false;
437            };
438            tree.get_path(&path).is_ok()
439        })
440    }
441
442    /// Find a file in the repository
443    #[must_use]
444    pub fn find_file_on_branch(&self, branch: &str, path: &str) -> Option<FileInfo> {
445        self.with_repo(|repo| {
446            let obj = repo.revparse_single(branch).ok()?;
447            let commit = obj.peel_to_commit().ok()?;
448            let mut revwalk = repo.revwalk().ok()?;
449            revwalk.push(commit.id()).ok()?;
450
451            for oid in revwalk {
452                let oid = oid.ok()?;
453                let commit = repo.find_commit(oid).ok()?;
454
455                if commit_touches_file(&commit, path) {
456                    let commit_info = Self::commit_to_info(&commit);
457                    let previous_authors =
458                        Self::get_previous_authors_from(repo, path, &commit, 4, |revwalk| {
459                            revwalk.push(commit.id())
460                        });
461
462                    return Some(FileInfo {
463                        path: path.to_string(),
464                        last_commit: commit_info,
465                        previous_authors,
466                    });
467                }
468            }
469
470            None
471        })
472    }
473
474    fn get_previous_authors_from(
475        repo: &Repository,
476        path: &str,
477        last_commit: &Commit,
478        limit: usize,
479        seed_revwalk: impl FnOnce(&mut git2::Revwalk) -> Result<(), git2::Error>,
480    ) -> Vec<(String, String, String)> {
481        let mut authors = Vec::new();
482        let Ok(mut revwalk) = repo.revwalk() else {
483            return authors;
484        };
485
486        if seed_revwalk(&mut revwalk).is_err() {
487            return authors;
488        }
489
490        let mut found_last = false;
491
492        for oid in revwalk {
493            if authors.len() >= limit {
494                break;
495            }
496
497            let Ok(oid) = oid else { continue };
498
499            let Ok(commit) = repo.find_commit(oid) else {
500                continue;
501            };
502
503            if !found_last {
504                if commit.id() == last_commit.id() {
505                    found_last = true;
506                }
507                continue;
508            }
509
510            if !commit_touches_file(&commit, path) {
511                continue;
512            }
513
514            let author = commit.author();
515            let name = author.name().unwrap_or("Unknown").to_string();
516            let email = author.email().unwrap_or("").to_string();
517
518            // Skip duplicates
519            if !authors.iter().any(|(_, n, e)| *n == name && *e == email) {
520                authors.push((commit.id().to_string(), name, email));
521            }
522        }
523
524        authors
525    }
526
527    /// Get all tags in the repository.
528    #[must_use]
529    pub fn get_tags(&self) -> Vec<TagInfo> {
530        self.with_repo(|repo| {
531            let mut tags = Vec::new();
532
533            if let Ok(tag_names) = repo.tag_names(None) {
534                for tag_name in tag_names.iter().flatten() {
535                    if let Ok(obj) = repo.revparse_single(tag_name)
536                        && let Ok(commit) = obj.peel_to_commit()
537                    {
538                        tags.push(TagInfo {
539                            name: tag_name.to_string(),
540                            commit_hash: commit.id().to_string(),
541                            semver_info: parse_semver(tag_name),
542                            created_at: git_time_to_datetime(commit.time()),
543                            is_release: false,
544                            release_name: None,
545                            release_url: None,
546                            published_at: None,
547                            tag_url: None,
548                        });
549                    }
550                }
551            }
552
553            tags
554        })
555    }
556
557    /// Get commits between two refs (from exclusive, to inclusive).
558    /// Returns commits in reverse chronological order (most recent first).
559    pub fn commits_between(&self, from_ref: &str, to_ref: &str, limit: usize) -> Vec<CommitInfo> {
560        self.with_repo(|repo| {
561            let mut result = Vec::new();
562
563            let Ok(to_obj) = repo.revparse_single(to_ref) else {
564                return result;
565            };
566            let Ok(to_commit) = to_obj.peel_to_commit() else {
567                return result;
568            };
569
570            let Ok(from_obj) = repo.revparse_single(from_ref) else {
571                return result;
572            };
573            let Ok(from_commit) = from_obj.peel_to_commit() else {
574                return result;
575            };
576
577            let Ok(mut revwalk) = repo.revwalk() else {
578                return result;
579            };
580
581            // Walk from to_ref back, stopping at from_ref
582            if revwalk.push(to_commit.id()).is_err() {
583                return result;
584            }
585            if revwalk.hide(from_commit.id()).is_err() {
586                return result;
587            }
588
589            for oid in revwalk.take(limit) {
590                let Ok(oid) = oid else { continue };
591                let Ok(commit) = repo.find_commit(oid) else {
592                    continue;
593                };
594                result.push(Self::commit_to_info(&commit));
595            }
596
597            result
598        })
599    }
600
601    /// Expose tags that contain the specified commit.
602    /// If `allow_fetch` is true, ensures tags are fetched first.
603    pub fn tags_containing_commit(&self, commit_hash: &str) -> Vec<TagInfo> {
604        // Ensure tags are available (fetches if needed)
605        let _ = self.ensure_tags();
606
607        let Ok(commit_oid) = Oid::from_str(commit_hash) else {
608            return Vec::new();
609        };
610
611        self.find_tags_containing_commit(commit_oid)
612            .unwrap_or_default()
613    }
614
615    /// Ensure all tags are available (fetches if needed).
616    fn ensure_tags(&self) -> WtgResult<()> {
617        {
618            let state = self.fetch_state.lock().expect("fetch state mutex poisoned");
619            if state.tags_synced || state.full_metadata_synced {
620                return Ok(());
621            }
622        }
623
624        if !self.allow_fetch {
625            return Ok(()); // Don't fetch if not allowed
626        }
627
628        let Some(remote_url) = &self.remote_url else {
629            return Ok(()); // No remote to fetch from
630        };
631
632        fetch_tags(&self.path, remote_url)?;
633
634        self.fetch_state
635            .lock()
636            .expect("fetch state mutex poisoned")
637            .tags_synced = true;
638
639        Ok(())
640    }
641
642    /// Convert a GitHub release into tag metadata if the tag exists locally.
643    #[must_use]
644    pub fn tag_from_release(&self, release: &ReleaseInfo) -> Option<TagInfo> {
645        self.with_repo(|repo| {
646            let obj = repo.revparse_single(&release.tag_name).ok()?;
647            let commit = obj.peel_to_commit().ok()?;
648            let semver_info = parse_semver(&release.tag_name);
649
650            Some(TagInfo {
651                name: release.tag_name.clone(),
652                commit_hash: commit.id().to_string(),
653                semver_info,
654                is_release: true,
655                release_name: release.name.clone(),
656                release_url: Some(release.url.clone()),
657                published_at: release.published_at,
658                created_at: git_time_to_datetime(commit.time()),
659                tag_url: Some(release.url.clone()),
660            })
661        })
662    }
663
664    /// Check whether a release tag contains the specified commit.
665    #[must_use]
666    pub fn tag_contains_commit(&self, tag_commit_hash: &str, commit_hash: &str) -> bool {
667        let Ok(tag_oid) = Oid::from_str(tag_commit_hash) else {
668            return false;
669        };
670        let Ok(commit_oid) = Oid::from_str(commit_hash) else {
671            return false;
672        };
673
674        self.is_ancestor(commit_oid, tag_oid)
675    }
676
677    /// Find all tags that contain a given commit (git-only, no GitHub enrichment)
678    /// Returns None if no tags contain the commit
679    /// Performance: Filters by timestamp before doing expensive ancestry checks
680    fn find_tags_containing_commit(&self, commit_oid: Oid) -> Option<Vec<TagInfo>> {
681        self.with_repo(|repo| {
682            let target_commit = repo.find_commit(commit_oid).ok()?;
683            let target_timestamp = target_commit.time().seconds();
684
685            let mut containing_tags = Vec::new();
686            let tag_names = repo.tag_names(None).ok()?;
687
688            for tag_name in tag_names.iter().flatten() {
689                if let Ok(obj) = repo.revparse_single(tag_name)
690                    && let Ok(commit) = obj.peel_to_commit()
691                {
692                    let tag_oid = commit.id();
693
694                    // Performance: Skip tags with commits older than target
695                    // (they cannot possibly contain the target commit)
696                    if commit.time().seconds() < target_timestamp {
697                        continue;
698                    }
699
700                    // Check if this tag points to the commit or if the tag is a descendant
701                    if tag_oid == commit_oid
702                        || repo
703                            .graph_descendant_of(tag_oid, commit_oid)
704                            .unwrap_or(false)
705                    {
706                        let semver_info = parse_semver(tag_name);
707
708                        containing_tags.push(TagInfo {
709                            name: tag_name.to_string(),
710                            commit_hash: tag_oid.to_string(),
711                            semver_info,
712                            created_at: git_time_to_datetime(commit.time()),
713                            is_release: false,
714                            release_name: None,
715                            release_url: None,
716                            published_at: None,
717                            tag_url: None,
718                        });
719                    }
720                }
721            }
722
723            if containing_tags.is_empty() {
724                None
725            } else {
726                Some(containing_tags)
727            }
728        })
729    }
730
731    /// Get commit timestamp for sorting (helper)
732    pub(crate) fn get_commit_timestamp(&self, commit_hash: &str) -> i64 {
733        self.with_repo(|repo| {
734            Oid::from_str(commit_hash)
735                .and_then(|oid| repo.find_commit(oid))
736                .map(|c| c.time().seconds())
737                .unwrap_or(0)
738        })
739    }
740
741    /// Check if commit1 is an ancestor of commit2
742    fn is_ancestor(&self, ancestor: Oid, descendant: Oid) -> bool {
743        self.with_repo(|repo| {
744            repo.graph_descendant_of(descendant, ancestor)
745                .unwrap_or(false)
746        })
747    }
748
749    /// Iterate over all remotes in the repository.
750    /// Returns an iterator of `RemoteInfo`.
751    pub fn remotes(&self) -> impl Iterator<Item = RemoteInfo> {
752        self.with_repo(Self::collect_remotes).into_iter()
753    }
754
755    /// Get the GitHub remote info.
756    /// Returns stored `gh_repo_info` if set, otherwise extracts from git remotes
757    /// using the `remotes()` API with priority ordering (upstream > origin > other,
758    /// GitHub remotes first within each kind).
759    #[must_use]
760    pub fn github_remote(&self) -> Option<GhRepoInfo> {
761        // Return stored gh_repo_info if explicitly set (e.g., from remote() constructor)
762        if let Some(info) = &self.gh_repo_info {
763            return Some(info.clone());
764        }
765
766        // Use remotes() API to find the best GitHub remote
767        let mut remotes: Vec<_> = self.remotes().collect();
768        remotes.sort_by_key(RemoteInfo::priority);
769
770        // Find the first GitHub remote and parse its URL
771        remotes
772            .into_iter()
773            .find(|r| r.host == Some(RemoteHost::GitHub))
774            .and_then(|r| parse_github_repo_url(&r.url))
775    }
776
777    /// Convert a `git2::Commit` to `CommitInfo`
778    fn commit_to_info(commit: &Commit) -> CommitInfo {
779        let message = commit.message().unwrap_or("").to_string();
780        let lines: Vec<&str> = message.lines().collect();
781        let message_lines = lines.len();
782        let time = commit.time();
783
784        CommitInfo {
785            hash: commit.id().to_string(),
786            short_hash: commit.id().to_string()[..7].to_string(),
787            message: (*lines.first().unwrap_or(&"")).to_string(),
788            message_lines,
789            commit_url: None,
790            author_name: commit.author().name().unwrap_or("Unknown").to_string(),
791            author_email: commit.author().email().map(str::to_string),
792            author_login: None,
793            author_url: None,
794            date: Utc.timestamp_opt(time.seconds(), 0).unwrap(),
795        }
796    }
797}
798
799/// Check if a string looks like a git commit hash (7-40 hex characters).
800pub(crate) fn looks_like_commit_hash(input: &str) -> bool {
801    let trimmed = input.trim();
802    trimmed.len() >= 7 && trimmed.len() <= 40 && trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
803}
804
805/// Check if a commit touches a specific file
806fn commit_touches_file(commit: &Commit, path: &str) -> bool {
807    let Ok(tree) = commit.tree() else {
808        return false;
809    };
810
811    let target_path = Path::new(path);
812    let current_entry = tree.get_path(target_path).ok();
813
814    // Root commit: if the file exists now, this commit introduced it
815    if commit.parent_count() == 0 {
816        return current_entry.is_some();
817    }
818
819    for parent in commit.parents() {
820        let Ok(parent_tree) = parent.tree() else {
821            continue;
822        };
823
824        let previous_entry = parent_tree.get_path(target_path).ok();
825        if tree_entries_differ(current_entry.as_ref(), previous_entry.as_ref()) {
826            return true;
827        }
828    }
829
830    false
831}
832
833fn tree_entries_differ(
834    current: Option<&git2::TreeEntry<'_>>,
835    previous: Option<&git2::TreeEntry<'_>>,
836) -> bool {
837    match (current, previous) {
838        (None, None) => false,
839        (Some(_), None) | (None, Some(_)) => true,
840        (Some(current_entry), Some(previous_entry)) => {
841            current_entry.id() != previous_entry.id()
842                || current_entry.filemode() != previous_entry.filemode()
843        }
844    }
845}
846
847/// Convert `git2::Time` to `chrono::DateTime<Utc>`
848#[must_use]
849pub fn git_time_to_datetime(time: git2::Time) -> DateTime<Utc> {
850    Utc.timestamp_opt(time.seconds(), 0).unwrap()
851}
852
853// ========================================
854// Remote/cache helper functions
855// ========================================
856
857/// Get the cache directory for remote repositories
858fn get_cache_dir() -> WtgResult<PathBuf> {
859    let cache_dir = dirs::cache_dir()
860        .ok_or_else(|| {
861            WtgError::Io(IoError::new(
862                ErrorKind::NotFound,
863                "Could not determine cache directory",
864            ))
865        })?
866        .join("wtg")
867        .join("repos");
868
869    if !cache_dir.exists() {
870        fs::create_dir_all(&cache_dir)?;
871    }
872
873    Ok(cache_dir)
874}
875
876/// Clone a remote repository using subprocess with filter=blob:none, falling back to git2 if needed
877fn clone_remote_repo(
878    owner: &str,
879    repo: &str,
880    target_path: &Path,
881    emit: &dyn Fn(Notice),
882) -> WtgResult<()> {
883    // Create parent directory
884    if let Some(parent) = target_path.parent() {
885        fs::create_dir_all(parent)?;
886    }
887
888    let repo_url = format!("https://github.com/{owner}/{repo}.git");
889
890    emit(Notice::CloningRepo {
891        url: repo_url.clone(),
892    });
893
894    // Try subprocess with --filter=blob:none first (requires Git 2.17+)
895    match clone_with_filter(&repo_url, target_path) {
896        Ok(()) => {
897            emit(Notice::CloneSucceeded { used_filter: true });
898            Ok(())
899        }
900        Err(e) => {
901            emit(Notice::CloneFallbackToBare {
902                error: e.to_string(),
903            });
904            // Fall back to git2 bare clone
905            clone_bare_with_git2(&repo_url, target_path, emit)
906        }
907    }
908}
909
910/// Clone with --filter=blob:none using subprocess
911fn clone_with_filter(repo_url: &str, target_path: &Path) -> WtgResult<()> {
912    let output = Command::new("git")
913        .args([
914            "clone",
915            "--filter=blob:none", // Don't download blobs until needed (Git 2.17+)
916            "--bare",             // Bare repository (no working directory)
917            repo_url,
918            target_path.to_str().ok_or_else(|| {
919                WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path"))
920            })?,
921        ])
922        .output()?;
923
924    if !output.status.success() {
925        let error = String::from_utf8_lossy(&output.stderr);
926        return Err(WtgError::Io(IoError::other(format!(
927            "Failed to clone with filter: {error}"
928        ))));
929    }
930
931    Ok(())
932}
933
934/// Clone bare repository using git2 (fallback)
935fn clone_bare_with_git2(
936    repo_url: &str,
937    target_path: &Path,
938    emit: &dyn Fn(Notice),
939) -> WtgResult<()> {
940    // Clone without progress output for cleaner UX
941    let callbacks = RemoteCallbacks::new();
942
943    let mut fetch_options = FetchOptions::new();
944    fetch_options.remote_callbacks(callbacks);
945
946    // Build the repository with options
947    let mut builder = git2::build::RepoBuilder::new();
948    builder.fetch_options(fetch_options);
949    builder.bare(true); // Bare repository - no working directory, only git metadata
950
951    // Clone the repository as bare
952    // This gets all commits, branches, and tags without checking out files
953    builder.clone(repo_url, target_path)?;
954
955    emit(Notice::CloneSucceeded { used_filter: false });
956
957    Ok(())
958}
959
960/// Update an existing cloned remote repository
961fn update_remote_repo(repo_path: &Path, emit: &dyn Fn(Notice)) -> WtgResult<()> {
962    emit(Notice::UpdatingCache);
963
964    // Try subprocess fetch first (works for both filter and non-filter repos)
965    match fetch_with_subprocess(repo_path) {
966        Ok(()) => {
967            emit(Notice::CacheUpdated);
968            Ok(())
969        }
970        Err(_) => {
971            // Fall back to git2
972            fetch_with_git2(repo_path, emit)
973        }
974    }
975}
976
977/// Fetch updates using subprocess
978fn fetch_with_subprocess(repo_path: &Path) -> WtgResult<()> {
979    let args = build_fetch_args(repo_path)?;
980
981    let output = Command::new("git").args(&args).output()?;
982
983    if !output.status.success() {
984        let error = String::from_utf8_lossy(&output.stderr);
985        return Err(WtgError::Io(IoError::other(format!(
986            "Failed to fetch: {error}"
987        ))));
988    }
989
990    Ok(())
991}
992
993/// Build the arguments passed to `git fetch` when refreshing cached repos.
994fn build_fetch_args(repo_path: &Path) -> WtgResult<Vec<String>> {
995    let repo_path = repo_path
996        .to_str()
997        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
998
999    Ok(vec![
1000        "-C".to_string(),
1001        repo_path.to_string(),
1002        "fetch".to_string(),
1003        "--all".to_string(),
1004        "--tags".to_string(),
1005        "--force".to_string(),
1006        "--prune".to_string(),
1007    ])
1008}
1009
1010/// Fetch updates using git2 (fallback)
1011fn fetch_with_git2(repo_path: &Path, emit: &dyn Fn(Notice)) -> WtgResult<()> {
1012    let repo = Repository::open(repo_path)?;
1013
1014    // Find the origin remote
1015    let mut remote = repo
1016        .find_remote("origin")
1017        .or_else(|_| repo.find_remote("upstream"))
1018        .map_err(WtgError::Git)?;
1019
1020    // Fetch without progress output for cleaner UX
1021    let callbacks = RemoteCallbacks::new();
1022    let mut fetch_options = FetchOptions::new();
1023    fetch_options.remote_callbacks(callbacks);
1024
1025    // Fetch all refs
1026    remote.fetch(
1027        &["refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"],
1028        Some(&mut fetch_options),
1029        None,
1030    )?;
1031
1032    emit(Notice::CacheUpdated);
1033
1034    Ok(())
1035}
1036
1037/// Check if a ref exists on remote without fetching (git ls-remote).
1038fn ls_remote_ref_exists(remote_url: &str, ref_spec: &str) -> WtgResult<bool> {
1039    let output = Command::new("git")
1040        .args(["ls-remote", "--exit-code", remote_url, ref_spec])
1041        .stderr(Stdio::null())
1042        .stdout(Stdio::null())
1043        .status();
1044
1045    match output {
1046        Ok(status) => Ok(status.success()),
1047        Err(e) => Err(WtgError::Io(e)),
1048    }
1049}
1050
1051/// Fetch a specific commit by hash.
1052fn fetch_commit(repo_path: &Path, remote_url: &str, hash: &str) -> WtgResult<()> {
1053    let repo_path_str = repo_path
1054        .to_str()
1055        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
1056
1057    let output = Command::new("git")
1058        .args(["-C", repo_path_str, "fetch", "--depth=1", remote_url, hash])
1059        .output()?;
1060
1061    if output.status.success() {
1062        Ok(())
1063    } else {
1064        let stderr = String::from_utf8_lossy(&output.stderr);
1065        Err(WtgError::Io(IoError::other(format!(
1066            "Failed to fetch commit {hash}: {stderr}"
1067        ))))
1068    }
1069}
1070
1071/// Fetch all tags from remote.
1072fn fetch_tags(repo_path: &Path, remote_url: &str) -> WtgResult<()> {
1073    let repo_path_str = repo_path
1074        .to_str()
1075        .ok_or_else(|| WtgError::Io(IoError::new(ErrorKind::InvalidInput, "Invalid path")))?;
1076
1077    let output = Command::new("git")
1078        .args([
1079            "-C",
1080            repo_path_str,
1081            "fetch",
1082            "--tags",
1083            "--force",
1084            remote_url,
1085        ])
1086        .output()?;
1087
1088    if output.status.success() {
1089        Ok(())
1090    } else {
1091        let stderr = String::from_utf8_lossy(&output.stderr);
1092        Err(WtgError::Io(IoError::other(format!(
1093            "Failed to fetch tags: {stderr}"
1094        ))))
1095    }
1096}
1097
1098#[cfg(test)]
1099mod tests {
1100    use tempfile::tempdir;
1101
1102    use super::*;
1103
1104    #[test]
1105    fn file_history_tracks_content_and_metadata_changes() {
1106        const ORIGINAL_PATH: &str = "config/policy.json";
1107        const RENAMED_PATH: &str = "config/policy-renamed.json";
1108        const EXECUTABLE_PATH: &str = "scripts/run.sh";
1109        const DELETED_PATH: &str = "docs/legacy.md";
1110        const DISTRACTION_PATH: &str = "README.md";
1111
1112        let temp = tempdir().expect("temp dir");
1113        let repo = Repository::init(temp.path()).expect("git repo");
1114
1115        commit_file(&repo, DISTRACTION_PATH, "noise", "add distraction");
1116        commit_file(&repo, ORIGINAL_PATH, "{\"version\":1}", "seed config");
1117        commit_file(&repo, ORIGINAL_PATH, "{\"version\":2}", "config tweak");
1118        let rename_commit = rename_file(&repo, ORIGINAL_PATH, RENAMED_PATH, "rename config");
1119        let post_rename_commit = commit_file(
1120            &repo,
1121            RENAMED_PATH,
1122            "{\"version\":3}",
1123            "update renamed config",
1124        );
1125
1126        commit_file(
1127            &repo,
1128            EXECUTABLE_PATH,
1129            "#!/bin/sh\\nprintf hi\n",
1130            "add runner",
1131        );
1132        let exec_mode_commit = change_file_mode(
1133            &repo,
1134            EXECUTABLE_PATH,
1135            git2::FileMode::BlobExecutable,
1136            "make runner executable",
1137        );
1138
1139        commit_file(&repo, DELETED_PATH, "bye", "add temporary file");
1140        let delete_commit = delete_file(&repo, DELETED_PATH, "remove temporary file");
1141
1142        let git_repo = GitRepo::from_path(temp.path()).expect("git repo wrapper");
1143
1144        let renamed_info = git_repo
1145            .find_file_on_branch("HEAD", RENAMED_PATH)
1146            .expect("renamed file info");
1147        assert_eq!(
1148            renamed_info.last_commit.hash,
1149            post_rename_commit.to_string()
1150        );
1151
1152        let original_info = git_repo
1153            .find_file_on_branch("HEAD", ORIGINAL_PATH)
1154            .expect("original file info");
1155        assert_eq!(original_info.last_commit.hash, rename_commit.to_string());
1156
1157        let exec_info = git_repo
1158            .find_file_on_branch("HEAD", EXECUTABLE_PATH)
1159            .expect("exec file info");
1160        assert_eq!(exec_info.last_commit.hash, exec_mode_commit.to_string());
1161
1162        let deleted_info = git_repo
1163            .find_file_on_branch("HEAD", DELETED_PATH)
1164            .expect("deleted file info");
1165        assert_eq!(deleted_info.last_commit.hash, delete_commit.to_string());
1166    }
1167
1168    fn commit_file(repo: &Repository, path: &str, contents: &str, message: &str) -> git2::Oid {
1169        let workdir = repo.workdir().expect("workdir");
1170        let file_path = workdir.join(path);
1171        if let Some(parent) = file_path.parent() {
1172            fs::create_dir_all(parent).expect("create dir");
1173        }
1174        fs::write(&file_path, contents).expect("write file");
1175
1176        let mut index = repo.index().expect("index");
1177        index.add_path(Path::new(path)).expect("add path");
1178        write_tree_and_commit(repo, &mut index, message)
1179    }
1180
1181    fn rename_file(repo: &Repository, from: &str, to: &str, message: &str) -> git2::Oid {
1182        let workdir = repo.workdir().expect("workdir");
1183        let from_path = workdir.join(from);
1184        let to_path = workdir.join(to);
1185        if let Some(parent) = to_path.parent() {
1186            fs::create_dir_all(parent).expect("create dir");
1187        }
1188        fs::rename(&from_path, &to_path).expect("rename file");
1189
1190        let mut index = repo.index().expect("index");
1191        index.remove_path(Path::new(from)).expect("remove old path");
1192        index.add_path(Path::new(to)).expect("add new path");
1193        write_tree_and_commit(repo, &mut index, message)
1194    }
1195
1196    fn delete_file(repo: &Repository, path: &str, message: &str) -> git2::Oid {
1197        let workdir = repo.workdir().expect("workdir");
1198        let file_path = workdir.join(path);
1199        if file_path.exists() {
1200            fs::remove_file(&file_path).expect("remove file");
1201        }
1202
1203        let mut index = repo.index().expect("index");
1204        index.remove_path(Path::new(path)).expect("remove path");
1205        write_tree_and_commit(repo, &mut index, message)
1206    }
1207
1208    fn change_file_mode(
1209        repo: &Repository,
1210        path: &str,
1211        mode: git2::FileMode,
1212        message: &str,
1213    ) -> git2::Oid {
1214        let mut index = repo.index().expect("index");
1215        index.add_path(Path::new(path)).expect("add path");
1216        force_index_mode(&mut index, path, mode);
1217        write_tree_and_commit(repo, &mut index, message)
1218    }
1219
1220    fn force_index_mode(index: &mut git2::Index, path: &str, mode: git2::FileMode) {
1221        if let Some(mut entry) = index.get_path(Path::new(path), 0) {
1222            entry.mode = u32::try_from(i32::from(mode)).expect("valid file mode");
1223            index.add(&entry).expect("re-add entry");
1224        }
1225    }
1226
1227    fn write_tree_and_commit(
1228        repo: &Repository,
1229        index: &mut git2::Index,
1230        message: &str,
1231    ) -> git2::Oid {
1232        index.write().expect("write index");
1233        let tree_oid = index.write_tree().expect("tree oid");
1234        let tree = repo.find_tree(tree_oid).expect("tree");
1235        let sig = test_signature();
1236
1237        let parents = repo
1238            .head()
1239            .ok()
1240            .and_then(|head| head.target())
1241            .and_then(|oid| repo.find_commit(oid).ok())
1242            .into_iter()
1243            .collect::<Vec<_>>();
1244        let parent_refs = parents.iter().collect::<Vec<_>>();
1245
1246        repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &parent_refs)
1247            .expect("commit")
1248    }
1249
1250    fn test_signature() -> git2::Signature<'static> {
1251        git2::Signature::now("Test User", "tester@example.com").expect("sig")
1252    }
1253}