use anyhow::{Context, Result};
use flate2::read::GzDecoder;
use reqwest::blocking::{Client, RequestBuilder};
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::io::Cursor;
use std::path::Path;
use tar::Archive;
use super::models::{GitHubUrl, SkillEntry, TapRegistry};
use crate::skill::SkillMetadata;
const USER_AGENT: &str = "skillshub";
fn build_client() -> Result<Client> {
Client::builder()
.user_agent(USER_AGENT)
.build()
.context("Failed to build HTTP client")
}
fn with_auth(request: RequestBuilder) -> RequestBuilder {
if let Ok(token) = std::env::var("GITHUB_TOKEN") {
request.bearer_auth(token)
} else {
request
}
}
#[derive(Debug, Deserialize)]
struct TreeResponse {
tree: Vec<TreeEntry>,
}
#[derive(Debug, Deserialize)]
struct TreeEntry {
path: String,
#[serde(rename = "type")]
entry_type: String,
}
#[derive(Debug, Deserialize)]
struct RepoInfo {
default_branch: String,
}
pub fn get_default_branch(owner: &str, repo: &str) -> Result<String> {
let client = build_client()?;
let api_base = std::env::var("SKILLSHUB_GITHUB_API_BASE").unwrap_or_else(|_| "https://api.github.com".to_string());
let url = format!("{}/repos/{}/{}", api_base, owner, repo);
let response = with_auth(client.get(&url))
.send()
.with_context(|| format!("Failed to fetch repo info from {}", url))?;
let status = response.status();
if !status.is_success() {
if status == reqwest::StatusCode::NOT_FOUND {
anyhow::bail!(
"Repository not found on GitHub: {}/{}\n\
Please check that:\n\
- The repository exists and is spelled correctly\n\
- The repository is public (or GITHUB_TOKEN is set for private repos)",
owner,
repo
);
}
anyhow::bail!("Failed to fetch repo info: HTTP {}", status);
}
let info: RepoInfo = response
.json()
.with_context(|| "Failed to parse repository info response")?;
Ok(info.default_branch)
}
pub fn parse_github_url(url: &str) -> Result<GitHubUrl> {
let url = url.trim_end_matches('/');
let path = url
.strip_prefix("https://github.com/")
.or_else(|| url.strip_prefix("http://github.com/"))
.or_else(|| url.strip_prefix("github.com/"));
let path = match path {
Some(p) => p,
None => {
if is_valid_repo_id(url) {
url
} else {
anyhow::bail!(
"Invalid GitHub URL or repository ID: {}\n\
Expected formats:\n\
- owner/repo\n\
- https://github.com/owner/repo",
url
);
}
}
};
let parts: Vec<&str> = path.split('/').collect();
if parts.len() < 2 {
anyhow::bail!("Invalid repository ID: must be in 'owner/repo' format");
}
let owner = parts[0].to_string();
let repo = parts[1].to_string();
let (branch, subpath) = if parts.len() > 3 && parts[2] == "tree" {
let branch = Some(parts[3].to_string());
let subpath = if parts.len() > 4 {
Some(parts[4..].join("/"))
} else {
None
};
(branch, subpath)
} else {
(None, None)
};
Ok(GitHubUrl {
owner,
repo,
branch,
path: subpath,
})
}
fn is_valid_repo_id(s: &str) -> bool {
let parts: Vec<&str> = s.split('/').collect();
if parts.len() != 2 {
return false;
}
let owner = parts[0];
let repo = parts[1];
if owner.is_empty() || repo.is_empty() {
return false;
}
let is_valid_part = |part: &str| {
!part.is_empty()
&& part
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_' || c == '.')
&& !part.starts_with('-')
&& !part.starts_with('.')
};
is_valid_part(owner) && is_valid_part(repo)
}
pub fn discover_skills_from_repo(github_url: &GitHubUrl, tap_name: &str) -> Result<TapRegistry> {
let client = build_client()?;
let branch = match &github_url.branch {
Some(b) => b.clone(),
None => get_default_branch(&github_url.owner, &github_url.repo)?,
};
let tree_url = format!("{}/git/trees/{}?recursive=1", github_url.api_url(), branch);
let response = with_auth(client.get(&tree_url))
.send()
.with_context(|| format!("Failed to fetch repo tree from {}", tree_url))?;
if !response.status().is_success() {
let status = response.status();
if status == reqwest::StatusCode::NOT_FOUND {
anyhow::bail!(
"Branch '{}' not found in repository {}/{}\n\
Please check that the branch exists.",
branch,
github_url.owner,
github_url.repo
);
}
anyhow::bail!("Failed to fetch repo tree: HTTP {} from {}", status, tree_url);
}
let tree_response: TreeResponse = response.json().with_context(|| "Failed to parse tree response")?;
let skill_paths = extract_skill_paths(&tree_response.tree);
if skill_paths.is_empty() {
anyhow::bail!("No skills found in repository (no SKILL.md files detected)");
}
let mut skills = HashMap::new();
for skill_path in &skill_paths {
let skill_md_url = if skill_path.is_empty() {
github_url.raw_url("SKILL.md", &branch)
} else {
github_url.raw_url(&format!("{}/SKILL.md", skill_path), &branch)
};
match with_auth(client.get(&skill_md_url)).send() {
Ok(resp) if resp.status().is_success() => {
if let Ok(content) = resp.text() {
if let Some((name, description)) = parse_skill_md_content(&content) {
skills.insert(
name.clone(),
SkillEntry {
path: skill_path.clone(),
description,
homepage: None,
},
);
}
}
}
_ => {
let skill_name = if skill_path.is_empty() {
&github_url.repo
} else {
skill_path.rsplit('/').next().unwrap_or(skill_path)
};
skills.insert(
skill_name.to_string(),
SkillEntry {
path: skill_path.clone(),
description: None,
homepage: None,
},
);
}
}
}
let description = Some(format!("Skills from {}/{}", github_url.owner, github_url.repo));
Ok(TapRegistry {
name: tap_name.to_string(),
description,
skills,
})
}
fn parse_skill_md_content(content: &str) -> Option<(String, Option<String>)> {
let parts: Vec<&str> = content.splitn(3, "---").collect();
if parts.len() < 3 {
return None;
}
let yaml_content = parts[1].trim();
let metadata: SkillMetadata = serde_yaml::from_str(yaml_content).ok()?;
Some((metadata.name, metadata.description))
}
pub fn get_latest_commit(github_url: &GitHubUrl, path: Option<&str>, resolved_branch: &str) -> Result<String> {
let client = build_client()?;
let mut url = format!("{}/commits?sha={}&per_page=1", github_url.api_url(), resolved_branch);
if let Some(p) = path {
url.push_str(&format!("&path={}", p));
}
let response = with_auth(client.get(&url))
.send()
.with_context(|| format!("Failed to fetch commits from {}", url))?;
if !response.status().is_success() {
anyhow::bail!("Failed to fetch commits: HTTP {}", response.status());
}
let commits: Vec<serde_json::Value> = response.json()?;
commits
.first()
.and_then(|c| c["sha"].as_str())
.map(|s| s[..7].to_string()) .with_context(|| "No commits found")
}
pub fn download_skill(github_url: &GitHubUrl, skill_path: &str, dest: &Path, commit: Option<&str>) -> Result<String> {
let resolved_branch = match &github_url.branch {
Some(b) => b.clone(),
None => get_default_branch(&github_url.owner, &github_url.repo)?,
};
let git_ref = commit.unwrap_or(&resolved_branch);
let client = build_client()?;
let tarball_url = github_url.tarball_url(git_ref);
let response = with_auth(client.get(&tarball_url))
.send()
.with_context(|| format!("Failed to download from {}", tarball_url))?;
if !response.status().is_success() {
anyhow::bail!(
"Failed to download tarball: HTTP {} from {}",
response.status(),
tarball_url
);
}
let bytes = response.bytes()?;
let commit_sha = commit.map(|s| s.to_string()).unwrap_or_else(|| {
get_latest_commit(github_url, Some(skill_path), &resolved_branch).unwrap_or_else(|err| {
println!(
"Warning: failed to resolve latest commit for {} ({}), using {}",
github_url.repo, err, git_ref
);
git_ref.to_string()
})
});
let cursor = Cursor::new(bytes);
let decoder = GzDecoder::new(cursor);
let mut archive = Archive::new(decoder);
let temp_dir = tempfile::tempdir()?;
archive.unpack(temp_dir.path())?;
let extracted_dir = fs::read_dir(temp_dir.path())?
.filter_map(|e| e.ok())
.find(|e| e.path().is_dir())
.with_context(|| "Failed to find extracted directory")?
.path();
let skill_source = if skill_path.is_empty() {
extracted_dir.clone()
} else {
extracted_dir.join(skill_path)
};
if !skill_source.exists() {
anyhow::bail!("Skill path '{}' not found in repository", skill_path);
}
if !skill_source.join("SKILL.md").exists() {
anyhow::bail!(
"Invalid skill: no SKILL.md found in '{}'",
if skill_path.is_empty() { "(root)" } else { skill_path }
);
}
fs::create_dir_all(dest)?;
copy_dir_contents(&skill_source, dest)?;
Ok(commit_sha)
}
fn extract_skill_paths(tree: &[TreeEntry]) -> Vec<String> {
tree.iter()
.filter(|entry| entry.entry_type == "blob" && (entry.path == "SKILL.md" || entry.path.ends_with("/SKILL.md")))
.map(|entry| {
entry
.path
.rsplit_once('/')
.map(|(parent, _)| parent.to_string())
.unwrap_or_default()
})
.collect()
}
fn copy_dir_contents(src: &Path, dst: &Path) -> Result<()> {
for entry in fs::read_dir(src)? {
let entry = entry?;
let src_path = entry.path();
let dst_path = dst.join(entry.file_name());
if src_path.is_dir() {
fs::create_dir_all(&dst_path)?;
copy_dir_contents(&src_path, &dst_path)?;
} else {
fs::copy(&src_path, &dst_path)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_skill_md_content() {
let content = r#"---
name: test-skill
description: A test skill
---
# Test Skill
Some content here.
"#;
let result = parse_skill_md_content(content);
assert!(result.is_some());
let (name, desc) = result.unwrap();
assert_eq!(name, "test-skill");
assert_eq!(desc, Some("A test skill".to_string()));
}
#[test]
fn test_parse_skill_md_content_no_description() {
let content = r#"---
name: minimal-skill
---
# Minimal
"#;
let result = parse_skill_md_content(content);
assert!(result.is_some());
let (name, desc) = result.unwrap();
assert_eq!(name, "minimal-skill");
assert!(desc.is_none());
}
#[test]
fn test_parse_skill_md_content_invalid() {
let content = "# No frontmatter here";
let result = parse_skill_md_content(content);
assert!(result.is_none());
}
#[test]
fn test_parse_github_url_simple() {
let url = parse_github_url("https://github.com/owner/repo").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert!(url.branch.is_none()); assert!(url.path.is_none());
}
#[test]
fn test_parse_github_url_with_branch() {
let url = parse_github_url("https://github.com/owner/repo/tree/develop").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert_eq!(url.branch, Some("develop".to_string()));
assert!(url.path.is_none());
}
#[test]
fn test_parse_github_url_with_path() {
let url = parse_github_url("https://github.com/owner/repo/tree/main/path/to/folder").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert_eq!(url.branch, Some("main".to_string()));
assert_eq!(url.path, Some("path/to/folder".to_string()));
}
#[test]
fn test_parse_github_url_with_master_branch() {
let url = parse_github_url("https://github.com/owner/repo/tree/master").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert_eq!(url.branch, Some("master".to_string()));
assert!(url.path.is_none());
}
#[test]
fn test_parse_github_url_no_protocol() {
let url = parse_github_url("github.com/owner/repo").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert!(url.branch.is_none()); }
#[test]
fn test_parse_github_url_trailing_slash() {
let url = parse_github_url("https://github.com/owner/repo/").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert!(url.branch.is_none());
}
#[test]
fn test_parse_github_url_invalid() {
assert!(parse_github_url("https://gitlab.com/owner/repo").is_err());
assert!(parse_github_url("https://github.com/owner").is_err());
assert!(parse_github_url("not-a-url").is_err());
}
#[test]
fn test_parse_github_url_repo_id_simple() {
let url = parse_github_url("owner/repo").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo");
assert!(url.branch.is_none()); assert!(url.path.is_none());
}
#[test]
fn test_parse_github_url_repo_id_with_hyphens() {
let url = parse_github_url("my-org/my-repo").unwrap();
assert_eq!(url.owner, "my-org");
assert_eq!(url.repo, "my-repo");
assert!(url.branch.is_none());
}
#[test]
fn test_parse_github_url_repo_id_with_underscores() {
let url = parse_github_url("user_name/repo_name").unwrap();
assert_eq!(url.owner, "user_name");
assert_eq!(url.repo, "repo_name");
assert!(url.branch.is_none());
}
#[test]
fn test_parse_github_url_repo_id_with_dots() {
let url = parse_github_url("owner/repo.js").unwrap();
assert_eq!(url.owner, "owner");
assert_eq!(url.repo, "repo.js");
assert!(url.branch.is_none());
}
#[test]
fn test_is_valid_repo_id() {
assert!(is_valid_repo_id("owner/repo"));
assert!(is_valid_repo_id("my-org/my-repo"));
assert!(is_valid_repo_id("user123/repo_name"));
assert!(is_valid_repo_id("Owner/Repo.js"));
}
#[test]
fn test_is_valid_repo_id_invalid() {
assert!(!is_valid_repo_id("just-one-part"));
assert!(!is_valid_repo_id("owner/repo/extra"));
assert!(!is_valid_repo_id("/repo"));
assert!(!is_valid_repo_id("owner/"));
assert!(!is_valid_repo_id("-owner/repo"));
assert!(!is_valid_repo_id(".owner/repo"));
assert!(!is_valid_repo_id("owner/repo name"));
}
fn tree_entry(path: &str, entry_type: &str) -> TreeEntry {
TreeEntry {
path: path.to_string(),
entry_type: entry_type.to_string(),
}
}
#[test]
fn test_extract_skill_paths_subdirectory() {
let tree = vec![
tree_entry("skills/code-reviewer/SKILL.md", "blob"),
tree_entry("skills/test-skill/SKILL.md", "blob"),
tree_entry("README.md", "blob"),
];
let paths = extract_skill_paths(&tree);
assert_eq!(paths, vec!["skills/code-reviewer", "skills/test-skill"]);
}
#[test]
fn test_extract_skill_paths_root_level() {
let tree = vec![tree_entry("SKILL.md", "blob"), tree_entry("README.md", "blob")];
let paths = extract_skill_paths(&tree);
assert_eq!(paths, vec![""]);
}
#[test]
fn test_extract_skill_paths_root_and_subdirectory() {
let tree = vec![
tree_entry("SKILL.md", "blob"),
tree_entry("skills/other-skill/SKILL.md", "blob"),
tree_entry("README.md", "blob"),
];
let paths = extract_skill_paths(&tree);
assert_eq!(paths, vec!["", "skills/other-skill"]);
}
#[test]
fn test_extract_skill_paths_no_skills() {
let tree = vec![tree_entry("README.md", "blob"), tree_entry("src/main.rs", "blob")];
let paths = extract_skill_paths(&tree);
assert!(paths.is_empty());
}
#[test]
fn test_extract_skill_paths_ignores_trees() {
let tree = vec![
tree_entry("SKILL.md", "tree"),
tree_entry("skills/test/SKILL.md", "blob"),
];
let paths = extract_skill_paths(&tree);
assert_eq!(paths, vec!["skills/test"]);
}
#[test]
fn test_extract_skill_paths_deep_nesting() {
let tree = vec![tree_entry("a/b/c/SKILL.md", "blob")];
let paths = extract_skill_paths(&tree);
assert_eq!(paths, vec!["a/b/c"]);
}
}