src2md/
git.rs

1//! Git repository cloning support.
2//!
3//! This module provides functionality to clone git repositories into temporary
4//! directories for processing by src2md. It is only available when the `git`
5//! feature is enabled.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! use src2md::git::clone_repository;
11//!
12//! let (temp_dir, repo_path) = clone_repository("https://github.com/user/repo")?;
13//! // repo_path points to the cloned repository
14//! // temp_dir is dropped when it goes out of scope, cleaning up the clone
15//! ```
16
17use anyhow::{Context, Result};
18use git2::{FetchOptions, RemoteCallbacks, build::RepoBuilder};
19use log::{debug, info};
20use std::path::PathBuf;
21use tempfile::TempDir;
22
23/// Result of cloning a repository.
24///
25/// Contains the temporary directory handle (which cleans up on drop) and
26/// the path to the cloned repository root.
27pub struct ClonedRepo {
28    /// The temporary directory containing the clone.
29    /// Dropping this will delete the cloned repository.
30    pub temp_dir: TempDir,
31    /// Path to the repository root within the temp directory.
32    pub path: PathBuf,
33}
34
35impl ClonedRepo {
36    /// Returns the path to the cloned repository.
37    pub fn path(&self) -> &PathBuf {
38        &self.path
39    }
40}
41
42/// Clones a git repository from the given URL into a temporary directory.
43///
44/// # Arguments
45///
46/// * `url` - The git URL to clone (HTTPS or SSH)
47/// * `branch` - Optional branch name to checkout (defaults to the default branch)
48///
49/// # Returns
50///
51/// A `ClonedRepo` containing the temporary directory and path to the clone.
52/// The temporary directory is automatically cleaned up when `ClonedRepo` is dropped.
53///
54/// # Errors
55///
56/// Returns an error if:
57/// - The URL is invalid
58/// - The repository cannot be cloned (network error, auth failure, etc.)
59/// - The temporary directory cannot be created
60pub fn clone_repository(url: &str, branch: Option<&str>) -> Result<ClonedRepo> {
61    info!("Cloning repository: {}", url);
62
63    // Create a temporary directory for the clone
64    let temp_dir = TempDir::new().context("Failed to create temporary directory for git clone")?;
65
66    let clone_path = temp_dir.path().to_path_buf();
67    debug!("Clone target: {}", clone_path.display());
68
69    // Set up progress callbacks for verbose output
70    let mut callbacks = RemoteCallbacks::new();
71    callbacks.transfer_progress(|progress| {
72        if progress.received_objects() == progress.total_objects() {
73            debug!(
74                "Resolving deltas: {}/{}",
75                progress.indexed_deltas(),
76                progress.total_deltas()
77            );
78        } else {
79            debug!(
80                "Receiving objects: {}/{} ({} bytes)",
81                progress.received_objects(),
82                progress.total_objects(),
83                progress.received_bytes()
84            );
85        }
86        true
87    });
88
89    // Configure fetch options
90    let mut fetch_opts = FetchOptions::new();
91    fetch_opts.remote_callbacks(callbacks);
92    // Note: We do NOT use shallow clone (depth(1)) as it can miss files in some edge cases
93
94    // Build and execute the clone
95    let mut builder = RepoBuilder::new();
96    builder.fetch_options(fetch_opts);
97
98    if let Some(branch_name) = branch {
99        debug!("Checking out branch: {}", branch_name);
100        builder.branch(branch_name);
101    }
102
103    builder
104        .clone(url, &clone_path)
105        .with_context(|| format!("Failed to clone repository: {}", url))?;
106
107    info!("Clone complete: {}", clone_path.display());
108
109    Ok(ClonedRepo {
110        temp_dir,
111        path: clone_path,
112    })
113}
114
115/// Extracts the repository name from a git URL.
116///
117/// # Examples
118///
119/// ```rust,ignore
120/// assert_eq!(repo_name_from_url("https://github.com/user/repo.git"), Some("repo"));
121/// assert_eq!(repo_name_from_url("https://github.com/user/repo"), Some("repo"));
122/// assert_eq!(repo_name_from_url("git@github.com:user/repo.git"), Some("repo"));
123/// ```
124pub fn repo_name_from_url(url: &str) -> Option<String> {
125    // Handle both HTTPS and SSH URLs
126    let path = if url.contains("://") {
127        // HTTPS URL: https://github.com/user/repo.git
128        url.rsplit('/').next()?
129    } else if url.contains(':') {
130        // SSH URL: git@github.com:user/repo.git
131        url.rsplit(':').next()?.rsplit('/').next()?
132    } else {
133        return None;
134    };
135
136    // Remove .git suffix if present
137    let name = path.strip_suffix(".git").unwrap_or(path);
138
139    if name.is_empty() {
140        None
141    } else {
142        Some(name.to_string())
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_repo_name_from_https_url() {
152        assert_eq!(
153            repo_name_from_url("https://github.com/user/myrepo.git"),
154            Some("myrepo".to_string())
155        );
156        assert_eq!(
157            repo_name_from_url("https://github.com/user/myrepo"),
158            Some("myrepo".to_string())
159        );
160        assert_eq!(
161            repo_name_from_url("https://gitlab.com/group/subgroup/project.git"),
162            Some("project".to_string())
163        );
164    }
165
166    #[test]
167    fn test_repo_name_from_ssh_url() {
168        assert_eq!(
169            repo_name_from_url("git@github.com:user/myrepo.git"),
170            Some("myrepo".to_string())
171        );
172        assert_eq!(
173            repo_name_from_url("git@github.com:user/myrepo"),
174            Some("myrepo".to_string())
175        );
176    }
177
178    #[test]
179    fn test_repo_name_invalid_url() {
180        assert_eq!(repo_name_from_url("not-a-url"), None);
181        assert_eq!(repo_name_from_url(""), None);
182    }
183}