use std::collections::HashSet;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use serde::Deserialize;
use tracing::{error, info, warn};
use super::{GitHubConfig, GitHubSource, OwnerKind};
#[derive(Debug, Deserialize)]
struct GitHubRepo {
name: String,
clone_url: String,
archived: bool,
fork: bool,
}
struct GitHubClient {
client: reqwest::Client,
token: String,
}
impl GitHubClient {
fn new(token: String) -> Result<Self> {
let client = reqwest::Client::builder()
.user_agent("codesearch-daemon")
.build()
.context("Failed to build HTTP client")?;
Ok(Self { client, token })
}
async fn list_repos(&self, source: &GitHubSource) -> Result<Vec<GitHubRepo>> {
let base_url = match source.kind {
OwnerKind::Org => format!("https://api.github.com/orgs/{}/repos", source.owner),
OwnerKind::User => format!("https://api.github.com/users/{}/repos", source.owner),
};
let mut all_repos = Vec::new();
let mut page = 1u32;
loop {
let resp = self
.client
.get(&base_url)
.query(&[
("per_page", "100"),
("page", &page.to_string()),
])
.header("Authorization", format!("Bearer {}", self.token))
.header("X-GitHub-Api-Version", "2022-11-28")
.header("Accept", "application/vnd.github+json")
.send()
.await
.with_context(|| format!("GitHub API request failed (page {})", page))?;
if let Some(remaining) = resp
.headers()
.get("x-ratelimit-remaining")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u32>().ok())
{
if remaining == 0 {
warn!("GitHub API rate limit exhausted, stopping pagination");
break;
}
}
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
return Err(anyhow::anyhow!(
"GitHub API returned {}: {}",
status,
body
));
}
let repos: Vec<GitHubRepo> = resp
.json()
.await
.context("Failed to parse GitHub repo list")?;
let count = repos.len();
all_repos.extend(repos);
if count < 100 {
break;
}
page += 1;
}
Ok(all_repos)
}
}
fn resolve_token(config: &GitHubConfig) -> Option<String> {
if let Some(ref path) = config.token_file {
let expanded = shellexpand::tilde(path);
match std::fs::read_to_string(expanded.as_ref()) {
Ok(token) => {
let token = token.trim().to_string();
if !token.is_empty() {
return Some(token);
}
warn!("Token file {} is empty", path);
}
Err(e) => {
warn!("Failed to read token file {}: {}", path, e);
}
}
}
match std::env::var("GITHUB_TOKEN") {
Ok(token) if !token.is_empty() => Some(token),
_ => None,
}
}
fn matches_pattern(name: &str, pattern: &str) -> bool {
let parts: Vec<&str> = pattern.split('*').collect();
if parts.len() == 1 {
return name == pattern;
}
let mut pos = 0;
for (i, part) in parts.iter().enumerate() {
if part.is_empty() {
continue;
}
if i == 0 {
if !name.starts_with(part) {
return false;
}
pos = part.len();
} else if i == parts.len() - 1 {
if !name[pos..].ends_with(part) {
return false;
}
pos = name.len();
} else {
match name[pos..].find(part) {
Some(found) => pos += found + part.len(),
None => return false,
}
}
}
true
}
fn is_excluded(name: &str, patterns: &[String]) -> bool {
patterns.iter().any(|p| matches_pattern(name, p))
}
fn filter_repos(repos: Vec<GitHubRepo>, source: &GitHubSource) -> Vec<GitHubRepo> {
repos
.into_iter()
.filter(|r| {
if source.skip_archived && r.archived {
return false;
}
if source.skip_forks && r.fork {
return false;
}
if is_excluded(&r.name, &source.exclude) {
return false;
}
true
})
.collect()
}
async fn clone_repo(clone_url: &str, dest: &Path, token: &str) -> Result<()> {
let url_with_auth = clone_url.replacen("https://", &format!("https://x-access-token:{}@", token), 1);
let dest = dest.to_path_buf();
let url = url_with_auth.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("Failed to create directory {}", parent.display()))?;
}
let mut prepare = gix::prepare_clone(gix::url::parse(url.as_str().into())?, &dest)
.with_context(|| format!("Failed to prepare clone to {}", dest.display()))?;
let (mut checkout, _outcome) = prepare
.fetch_then_checkout(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)
.with_context(|| format!("Failed to fetch {}", dest.display()))?;
let (_repo, _outcome) = checkout
.main_worktree(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)
.with_context(|| format!("Failed to checkout {}", dest.display()))?;
Ok(())
})
.await
.context("Clone task panicked")?
}
pub async fn resolve_all_repos(
explicit: Vec<PathBuf>,
github_config: Option<&GitHubConfig>,
) -> Vec<PathBuf> {
let mut all_paths: Vec<PathBuf> = explicit;
let mut seen = HashSet::new();
let config = match github_config {
Some(c) if !c.sources.is_empty() => c,
_ => {
return all_paths;
}
};
let token = match resolve_token(config) {
Some(t) => t,
None => {
warn!("No GitHub token available — skipping repo discovery (set token_file or GITHUB_TOKEN)");
return all_paths;
}
};
let client = match GitHubClient::new(token.clone()) {
Ok(c) => c,
Err(e) => {
error!("Failed to create GitHub client: {}", e);
return all_paths;
}
};
for source in &config.sources {
info!(
"Discovering repos from {} {} (clone_base: {})",
match source.kind {
OwnerKind::Org => "org",
OwnerKind::User => "user",
},
source.owner,
source.clone_base.display()
);
let repos = match client.list_repos(source).await {
Ok(r) => r,
Err(e) => {
error!("Failed to list repos for {}: {}", source.owner, e);
continue;
}
};
let total = repos.len();
let filtered = filter_repos(repos, source);
info!(
"Found {} repos for {} ({} after filtering)",
total,
source.owner,
filtered.len()
);
let clone_base_str = source.clone_base.to_string_lossy();
let expanded = shellexpand::tilde(&clone_base_str);
let clone_base = PathBuf::from(expanded.as_ref());
for repo in &filtered {
let local_path = clone_base.join(&repo.name);
if local_path.exists() {
info!("Found local clone: {}", local_path.display());
all_paths.push(local_path);
} else if source.auto_clone {
info!("Cloning {} → {}", repo.name, local_path.display());
match clone_repo(&repo.clone_url, &local_path, &token).await {
Ok(()) => {
info!("Cloned {}", repo.name);
all_paths.push(local_path);
}
Err(e) => {
error!("Failed to clone {}: {}", repo.name, e);
}
}
} else {
info!("Skipping {} (not cloned, auto_clone=false)", repo.name);
}
}
}
all_paths.retain(|p| {
let key = p.canonicalize().unwrap_or_else(|_| p.clone());
seen.insert(key)
});
all_paths
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_matches_pattern_exact() {
assert!(matches_pattern("foo", "foo"));
assert!(!matches_pattern("foo", "bar"));
}
#[test]
fn test_matches_pattern_suffix_wildcard() {
assert!(matches_pattern("legacy-api", "legacy-*"));
assert!(matches_pattern("legacy-", "legacy-*"));
assert!(!matches_pattern("new-api", "legacy-*"));
}
#[test]
fn test_matches_pattern_prefix_wildcard() {
assert!(matches_pattern("repo.wiki", "*.wiki"));
assert!(matches_pattern(".wiki", "*.wiki"));
assert!(!matches_pattern("repo.git", "*.wiki"));
}
#[test]
fn test_matches_pattern_middle_wildcard() {
assert!(matches_pattern("test-foo-old", "test-*-old"));
assert!(matches_pattern("test--old", "test-*-old"));
assert!(!matches_pattern("test-foo-new", "test-*-old"));
}
#[test]
fn test_matches_pattern_star_only() {
assert!(matches_pattern("anything", "*"));
assert!(matches_pattern("", "*"));
}
#[test]
fn test_is_excluded() {
let patterns = vec!["*.wiki".to_string(), "legacy-*".to_string()];
assert!(is_excluded("repo.wiki", &patterns));
assert!(is_excluded("legacy-api", &patterns));
assert!(!is_excluded("codesearch", &patterns));
}
}