Skip to main content

mdmodels_core/
git.rs

1use std::error::Error;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use reqwest::blocking::Client;
7use reqwest::header::{ACCEPT, USER_AGENT};
8use reqwest::StatusCode;
9use reqwest::Url;
10use serde::Deserialize;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct CachedGithubRepo {
14    pub owner: String,
15    pub name: String,
16    pub reference: String,
17    pub commit: String,
18    pub root: PathBuf,
19}
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22struct GithubRepoSpec {
23    owner: String,
24    name: String,
25    reference: Option<String>,
26}
27
28#[derive(Debug, Deserialize)]
29struct GithubRepoResponse {
30    default_branch: String,
31}
32
33#[derive(Debug, Deserialize)]
34struct GithubCommitResponse {
35    sha: String,
36}
37
38pub fn cache_github_repo(repo: &str) -> Result<CachedGithubRepo, Box<dyn Error>> {
39    let spec = parse_github_repo_spec(repo)?;
40    let client = github_client()?;
41
42    let reference = match spec.reference {
43        Some(reference) => reference,
44        None => fetch_default_branch(&client, &spec.owner, &spec.name)?,
45    };
46
47    let commit = fetch_commit_sha(&client, &spec.owner, &spec.name, &reference)?;
48    let cache_root = github_cache_root();
49    let target = cache_root.join(&spec.owner).join(&spec.name).join(&commit);
50
51    if !target.exists() {
52        fs::create_dir_all(target.parent().ok_or("Invalid cache path")?)?;
53        clone_into_cache(&spec.owner, &spec.name, &reference, &target)?;
54    }
55
56    Ok(CachedGithubRepo {
57        owner: spec.owner,
58        name: spec.name,
59        reference,
60        commit,
61        root: target,
62    })
63}
64
65pub fn parse_github_file_url(url: &str) -> Option<(String, String)> {
66    if let Some(rest) = url.strip_prefix("https://raw.githubusercontent.com/") {
67        let mut parts = rest.splitn(4, '/');
68        let owner = parts.next()?;
69        let repo = parts.next()?;
70        let reference = parts.next()?;
71        let path = parts.next()?;
72        return Some((format!("{owner}/{repo}@{reference}"), path.to_string()));
73    }
74
75    if let Some(rest) = url.strip_prefix("https://github.com/") {
76        let parts: Vec<&str> = rest.split('/').collect();
77        if parts.len() >= 5 && parts[2] == "blob" {
78            let owner = parts[0];
79            let repo = parts[1];
80            let reference = parts[3];
81            let path = parts[4..].join("/");
82            return Some((format!("{owner}/{repo}@{reference}"), path));
83        }
84    }
85
86    None
87}
88
89fn parse_github_repo_spec(input: &str) -> Result<GithubRepoSpec, Box<dyn Error>> {
90    let trimmed = input.trim().strip_prefix("git+").unwrap_or(input.trim());
91
92    let raw = if let Some(rest) = trimmed.strip_prefix("https://github.com/") {
93        rest.trim_end_matches('/')
94    } else if let Some(rest) = trimmed.strip_prefix("http://github.com/") {
95        rest.trim_end_matches('/')
96    } else {
97        trimmed
98    };
99
100    let (repo_part, reference) = if let Some((repo_part, reference)) = raw.rsplit_once('@') {
101        (
102            repo_part.trim_end_matches(".git"),
103            Some(reference.to_string()),
104        )
105    } else {
106        (raw.trim_end_matches(".git"), None)
107    };
108
109    let mut parts = repo_part.split('/');
110    let owner = parts.next().ok_or("Missing GitHub owner")?.to_string();
111    let name = parts.next().ok_or("Missing GitHub repo name")?.to_string();
112
113    if owner.is_empty() || name.is_empty() || parts.next().is_some() {
114        return Err("Expected GitHub repo in the form owner/repo[@ref]".into());
115    }
116
117    Ok(GithubRepoSpec {
118        owner,
119        name,
120        reference,
121    })
122}
123
124fn github_client() -> Result<Client, Box<dyn Error>> {
125    Ok(Client::builder().build()?)
126}
127
128fn fetch_default_branch(
129    client: &Client,
130    owner: &str,
131    repo: &str,
132) -> Result<String, Box<dyn Error>> {
133    let url = format!("https://api.github.com/repos/{owner}/{repo}");
134    let response = client
135        .get(&url)
136        .header(USER_AGENT, "mdmodels")
137        .header(ACCEPT, "application/vnd.github+json")
138        .send()?;
139
140    if !response.status().is_success() {
141        return Err(github_http_error(
142            response.status(),
143            owner,
144            repo,
145            None,
146            "lookup repository metadata",
147        )
148        .into());
149    }
150
151    let payload: GithubRepoResponse = response.json()?;
152    Ok(payload.default_branch)
153}
154
155fn fetch_commit_sha(
156    client: &Client,
157    owner: &str,
158    repo: &str,
159    reference: &str,
160) -> Result<String, Box<dyn Error>> {
161    let mut url = Url::parse("https://api.github.com")?;
162    url.path_segments_mut()
163        .map_err(|_| "Failed to build GitHub commit URL")?
164        .extend(["repos", owner, repo, "commits", reference]);
165
166    let response = client
167        .get(url)
168        .header(USER_AGENT, "mdmodels")
169        .header(ACCEPT, "application/vnd.github+json")
170        .send()?;
171
172    if !response.status().is_success() {
173        return Err(github_http_error(
174            response.status(),
175            owner,
176            repo,
177            Some(reference),
178            "resolve commit for ref",
179        )
180        .into());
181    }
182
183    let payload: GithubCommitResponse = response.json()?;
184    Ok(payload.sha)
185}
186
187fn github_http_error(
188    status: StatusCode,
189    owner: &str,
190    repo: &str,
191    reference: Option<&str>,
192    action: &str,
193) -> String {
194    let target = match reference {
195        Some(reference) => format!("{owner}/{repo}@{reference}"),
196        None => format!("{owner}/{repo}"),
197    };
198
199    match status {
200        StatusCode::NOT_FOUND => match reference {
201            Some(reference) => format!(
202                "GitHub {action} failed: '{target}' was not found. Check the repository name and ref '{reference}'."
203            ),
204            None => format!(
205                "GitHub {action} failed: repository '{target}' was not found. Check owner/repo spelling."
206            ),
207        },
208        StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => format!(
209            "GitHub {action} failed for '{target}' (HTTP {status}). Repository may be private or rate-limited."
210        ),
211        _ => format!("GitHub {action} failed for '{target}' (HTTP {status})."),
212    }
213}
214
215fn clone_into_cache(
216    owner: &str,
217    repo: &str,
218    reference: &str,
219    target: &Path,
220) -> Result<(), Box<dyn Error>> {
221    let remote_url = format!("https://github.com/{owner}/{repo}.git");
222
223    let temp_dir = target
224        .parent()
225        .ok_or("Invalid target path")?
226        .join(format!(".tmp-{}", cache_suffix()));
227
228    if temp_dir.exists() {
229        fs::remove_dir_all(&temp_dir)?;
230    }
231
232    let mut prepare = gix::prepare_clone(remote_url, &temp_dir)?;
233    prepare = prepare.with_ref_name(Some(reference))?;
234
235    let (mut checkout, _fetch_outcome) =
236        prepare.fetch_then_checkout(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?;
237    let (_repo, _checkout_outcome) =
238        checkout.main_worktree(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)?;
239
240    if target.exists() {
241        fs::remove_dir_all(&temp_dir)?;
242        return Ok(());
243    }
244
245    fs::rename(&temp_dir, target)?;
246    Ok(())
247}
248
249fn github_cache_root() -> PathBuf {
250    cache_base_dir().join("mdmodels").join("github")
251}
252
253fn cache_base_dir() -> PathBuf {
254    dirs::cache_dir().unwrap_or_else(std::env::temp_dir)
255}
256
257fn cache_suffix() -> String {
258    let now = SystemTime::now()
259        .duration_since(UNIX_EPOCH)
260        .unwrap_or_default()
261        .as_nanos();
262    format!("{now}-{}", std::process::id())
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn parse_repo_without_ref() {
271        let spec = parse_github_repo_spec("owner/repo").expect("spec should parse");
272        assert_eq!(spec.owner, "owner");
273        assert_eq!(spec.name, "repo");
274        assert_eq!(spec.reference, None);
275    }
276
277    #[test]
278    fn parse_repo_with_ref() {
279        let spec = parse_github_repo_spec("owner/repo@main").expect("spec should parse");
280        assert_eq!(spec.owner, "owner");
281        assert_eq!(spec.name, "repo");
282        assert_eq!(spec.reference.as_deref(), Some("main"));
283    }
284
285    #[test]
286    fn parse_repo_with_git_plus_url() {
287        let spec = parse_github_repo_spec("git+https://github.com/owner/repo.git@v1.2.3")
288            .expect("spec should parse");
289        assert_eq!(spec.owner, "owner");
290        assert_eq!(spec.name, "repo");
291        assert_eq!(spec.reference.as_deref(), Some("v1.2.3"));
292    }
293
294    #[test]
295    fn parse_blob_url() {
296        let parsed = parse_github_file_url("https://github.com/a/b/blob/main/models/root.md")
297            .expect("url should parse");
298        assert_eq!(parsed.0, "a/b@main");
299        assert_eq!(parsed.1, "models/root.md");
300    }
301
302    #[test]
303    fn parse_raw_url() {
304        let parsed =
305            parse_github_file_url("https://raw.githubusercontent.com/a/b/main/models/root.md")
306                .expect("url should parse");
307        assert_eq!(parsed.0, "a/b@main");
308        assert_eq!(parsed.1, "models/root.md");
309    }
310}