src2md/git.rs
1//! Git repository cloning support.
2//!
3//! This module provides functionality to clone git repositories into temporary
4//! directories for processing by src2md. It is only available when the `git`
5//! feature is enabled.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! use src2md::git::clone_repository;
11//!
12//! let (temp_dir, repo_path) = clone_repository("https://github.com/user/repo")?;
13//! // repo_path points to the cloned repository
14//! // temp_dir is dropped when it goes out of scope, cleaning up the clone
15//! ```
16
17use anyhow::{Context, Result};
18use git2::{FetchOptions, RemoteCallbacks, build::RepoBuilder};
19use log::{debug, info};
20use std::path::PathBuf;
21use tempfile::TempDir;
22
23/// Result of cloning a repository.
24///
25/// Contains the temporary directory handle (which cleans up on drop) and
26/// the path to the cloned repository root.
27pub struct ClonedRepo {
28 /// The temporary directory containing the clone.
29 /// Dropping this will delete the cloned repository.
30 pub temp_dir: TempDir,
31 /// Path to the repository root within the temp directory.
32 pub path: PathBuf,
33}
34
35impl ClonedRepo {
36 /// Returns the path to the cloned repository.
37 pub fn path(&self) -> &PathBuf {
38 &self.path
39 }
40}
41
42/// Clones a git repository from the given URL into a temporary directory.
43///
44/// # Arguments
45///
46/// * `url` - The git URL to clone (HTTPS or SSH)
47/// * `branch` - Optional branch name to checkout (defaults to the default branch)
48///
49/// # Returns
50///
51/// A `ClonedRepo` containing the temporary directory and path to the clone.
52/// The temporary directory is automatically cleaned up when `ClonedRepo` is dropped.
53///
54/// # Errors
55///
56/// Returns an error if:
57/// - The URL is invalid
58/// - The repository cannot be cloned (network error, auth failure, etc.)
59/// - The temporary directory cannot be created
60pub fn clone_repository(url: &str, branch: Option<&str>) -> Result<ClonedRepo> {
61 info!("Cloning repository: {}", url);
62
63 // Create a temporary directory for the clone
64 let temp_dir = TempDir::new().context("Failed to create temporary directory for git clone")?;
65
66 let clone_path = temp_dir.path().to_path_buf();
67 debug!("Clone target: {}", clone_path.display());
68
69 // Set up progress callbacks for verbose output
70 let mut callbacks = RemoteCallbacks::new();
71 callbacks.transfer_progress(|progress| {
72 if progress.received_objects() == progress.total_objects() {
73 debug!(
74 "Resolving deltas: {}/{}",
75 progress.indexed_deltas(),
76 progress.total_deltas()
77 );
78 } else {
79 debug!(
80 "Receiving objects: {}/{} ({} bytes)",
81 progress.received_objects(),
82 progress.total_objects(),
83 progress.received_bytes()
84 );
85 }
86 true
87 });
88
89 // Configure fetch options
90 let mut fetch_opts = FetchOptions::new();
91 fetch_opts.remote_callbacks(callbacks);
92 // Note: We do NOT use shallow clone (depth(1)) as it can miss files in some edge cases
93
94 // Build and execute the clone
95 let mut builder = RepoBuilder::new();
96 builder.fetch_options(fetch_opts);
97
98 if let Some(branch_name) = branch {
99 debug!("Checking out branch: {}", branch_name);
100 builder.branch(branch_name);
101 }
102
103 builder
104 .clone(url, &clone_path)
105 .with_context(|| format!("Failed to clone repository: {}", url))?;
106
107 info!("Clone complete: {}", clone_path.display());
108
109 Ok(ClonedRepo {
110 temp_dir,
111 path: clone_path,
112 })
113}
114
115/// Extracts the repository name from a git URL.
116///
117/// # Examples
118///
119/// ```rust,ignore
120/// assert_eq!(repo_name_from_url("https://github.com/user/repo.git"), Some("repo"));
121/// assert_eq!(repo_name_from_url("https://github.com/user/repo"), Some("repo"));
122/// assert_eq!(repo_name_from_url("git@github.com:user/repo.git"), Some("repo"));
123/// ```
124pub fn repo_name_from_url(url: &str) -> Option<String> {
125 // Handle both HTTPS and SSH URLs
126 let path = if url.contains("://") {
127 // HTTPS URL: https://github.com/user/repo.git
128 url.rsplit('/').next()?
129 } else if url.contains(':') {
130 // SSH URL: git@github.com:user/repo.git
131 url.rsplit(':').next()?.rsplit('/').next()?
132 } else {
133 return None;
134 };
135
136 // Remove .git suffix if present
137 let name = path.strip_suffix(".git").unwrap_or(path);
138
139 if name.is_empty() {
140 None
141 } else {
142 Some(name.to_string())
143 }
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_repo_name_from_https_url() {
152 assert_eq!(
153 repo_name_from_url("https://github.com/user/myrepo.git"),
154 Some("myrepo".to_string())
155 );
156 assert_eq!(
157 repo_name_from_url("https://github.com/user/myrepo"),
158 Some("myrepo".to_string())
159 );
160 assert_eq!(
161 repo_name_from_url("https://gitlab.com/group/subgroup/project.git"),
162 Some("project".to_string())
163 );
164 }
165
166 #[test]
167 fn test_repo_name_from_ssh_url() {
168 assert_eq!(
169 repo_name_from_url("git@github.com:user/myrepo.git"),
170 Some("myrepo".to_string())
171 );
172 assert_eq!(
173 repo_name_from_url("git@github.com:user/myrepo"),
174 Some("myrepo".to_string())
175 );
176 }
177
178 #[test]
179 fn test_repo_name_invalid_url() {
180 assert_eq!(repo_name_from_url("not-a-url"), None);
181 assert_eq!(repo_name_from_url(""), None);
182 }
183}