1use std::collections::HashSet;
7use std::path::{Path, PathBuf};
8
9use anyhow::{Context, Result};
10use serde::Deserialize;
11use tracing::{error, info, warn};
12
13use super::{GitHubConfig, GitHubSource, OwnerKind};
14
15#[derive(Debug, Deserialize)]
17struct GitHubRepo {
18 name: String,
19 clone_url: String,
20 archived: bool,
21 fork: bool,
22}
23
24struct GitHubClient {
26 client: reqwest::Client,
27 token: String,
28}
29
30impl GitHubClient {
31 fn new(token: String) -> Result<Self> {
32 let client = reqwest::Client::builder()
33 .user_agent("codesearch-daemon")
34 .build()
35 .context("Failed to build HTTP client")?;
36 Ok(Self { client, token })
37 }
38
39 async fn list_repos(&self, source: &GitHubSource) -> Result<Vec<GitHubRepo>> {
41 let base_url = match source.kind {
42 OwnerKind::Org => format!("https://api.github.com/orgs/{}/repos", source.owner),
43 OwnerKind::User => format!("https://api.github.com/users/{}/repos", source.owner),
44 };
45
46 let mut all_repos = Vec::new();
47 let mut page = 1u32;
48
49 loop {
50 let resp = self
51 .client
52 .get(&base_url)
53 .query(&[
54 ("per_page", "100"),
55 ("page", &page.to_string()),
56 ])
57 .header("Authorization", format!("Bearer {}", self.token))
58 .header("X-GitHub-Api-Version", "2022-11-28")
59 .header("Accept", "application/vnd.github+json")
60 .send()
61 .await
62 .with_context(|| format!("GitHub API request failed (page {})", page))?;
63
64 if let Some(remaining) = resp
66 .headers()
67 .get("x-ratelimit-remaining")
68 .and_then(|v| v.to_str().ok())
69 .and_then(|v| v.parse::<u32>().ok())
70 {
71 if remaining == 0 {
72 warn!("GitHub API rate limit exhausted, stopping pagination");
73 break;
74 }
75 }
76
77 let status = resp.status();
78 if !status.is_success() {
79 let body = resp.text().await.unwrap_or_default();
80 return Err(anyhow::anyhow!(
81 "GitHub API returned {}: {}",
82 status,
83 body
84 ));
85 }
86
87 let repos: Vec<GitHubRepo> = resp
88 .json()
89 .await
90 .context("Failed to parse GitHub repo list")?;
91
92 let count = repos.len();
93 all_repos.extend(repos);
94
95 if count < 100 {
97 break;
98 }
99 page += 1;
100 }
101
102 Ok(all_repos)
103 }
104}
105
106fn resolve_token(config: &GitHubConfig) -> Option<String> {
108 if let Some(ref path) = config.token_file {
110 let expanded = shellexpand::tilde(path);
111 match std::fs::read_to_string(expanded.as_ref()) {
112 Ok(token) => {
113 let token = token.trim().to_string();
114 if !token.is_empty() {
115 return Some(token);
116 }
117 warn!("Token file {} is empty", path);
118 }
119 Err(e) => {
120 warn!("Failed to read token file {}: {}", path, e);
121 }
122 }
123 }
124
125 match std::env::var("GITHUB_TOKEN") {
127 Ok(token) if !token.is_empty() => Some(token),
128 _ => None,
129 }
130}
131
132fn matches_pattern(name: &str, pattern: &str) -> bool {
137 let parts: Vec<&str> = pattern.split('*').collect();
138
139 if parts.len() == 1 {
140 return name == pattern;
142 }
143
144 let mut pos = 0;
145
146 for (i, part) in parts.iter().enumerate() {
147 if part.is_empty() {
148 continue;
149 }
150
151 if i == 0 {
152 if !name.starts_with(part) {
154 return false;
155 }
156 pos = part.len();
157 } else if i == parts.len() - 1 {
158 if !name[pos..].ends_with(part) {
160 return false;
161 }
162 pos = name.len();
163 } else {
164 match name[pos..].find(part) {
166 Some(found) => pos += found + part.len(),
167 None => return false,
168 }
169 }
170 }
171
172 true
173}
174
175fn is_excluded(name: &str, patterns: &[String]) -> bool {
177 patterns.iter().any(|p| matches_pattern(name, p))
178}
179
180fn filter_repos(repos: Vec<GitHubRepo>, source: &GitHubSource) -> Vec<GitHubRepo> {
182 repos
183 .into_iter()
184 .filter(|r| {
185 if source.skip_archived && r.archived {
186 return false;
187 }
188 if source.skip_forks && r.fork {
189 return false;
190 }
191 if is_excluded(&r.name, &source.exclude) {
192 return false;
193 }
194 true
195 })
196 .collect()
197}
198
199async fn clone_repo(clone_url: &str, dest: &Path, token: &str) -> Result<()> {
201 let url_with_auth = clone_url.replacen("https://", &format!("https://x-access-token:{}@", token), 1);
202 let dest = dest.to_path_buf();
203 let url = url_with_auth.clone();
204
205 tokio::task::spawn_blocking(move || -> Result<()> {
206 if let Some(parent) = dest.parent() {
208 std::fs::create_dir_all(parent)
209 .with_context(|| format!("Failed to create directory {}", parent.display()))?;
210 }
211
212 let mut prepare = gix::prepare_clone(gix::url::parse(url.as_str().into())?, &dest)
213 .with_context(|| format!("Failed to prepare clone to {}", dest.display()))?;
214
215 let (mut checkout, _outcome) = prepare
216 .fetch_then_checkout(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)
217 .with_context(|| format!("Failed to fetch {}", dest.display()))?;
218
219 let (_repo, _outcome) = checkout
220 .main_worktree(gix::progress::Discard, &gix::interrupt::IS_INTERRUPTED)
221 .with_context(|| format!("Failed to checkout {}", dest.display()))?;
222
223 Ok(())
224 })
225 .await
226 .context("Clone task panicked")?
227}
228
229pub async fn resolve_all_repos(
233 explicit: Vec<PathBuf>,
234 github_config: Option<&GitHubConfig>,
235) -> Vec<PathBuf> {
236 let mut all_paths: Vec<PathBuf> = explicit;
237 let mut seen = HashSet::new();
238
239 let config = match github_config {
240 Some(c) if !c.sources.is_empty() => c,
241 _ => {
242 return all_paths;
244 }
245 };
246
247 let token = match resolve_token(config) {
248 Some(t) => t,
249 None => {
250 warn!("No GitHub token available — skipping repo discovery (set token_file or GITHUB_TOKEN)");
251 return all_paths;
252 }
253 };
254
255 let client = match GitHubClient::new(token.clone()) {
256 Ok(c) => c,
257 Err(e) => {
258 error!("Failed to create GitHub client: {}", e);
259 return all_paths;
260 }
261 };
262
263 for source in &config.sources {
264 info!(
265 "Discovering repos from {} {} (clone_base: {})",
266 match source.kind {
267 OwnerKind::Org => "org",
268 OwnerKind::User => "user",
269 },
270 source.owner,
271 source.clone_base.display()
272 );
273
274 let repos = match client.list_repos(source).await {
275 Ok(r) => r,
276 Err(e) => {
277 error!("Failed to list repos for {}: {}", source.owner, e);
278 continue;
279 }
280 };
281
282 let total = repos.len();
283 let filtered = filter_repos(repos, source);
284 info!(
285 "Found {} repos for {} ({} after filtering)",
286 total,
287 source.owner,
288 filtered.len()
289 );
290
291 let clone_base_str = source.clone_base.to_string_lossy();
293 let expanded = shellexpand::tilde(&clone_base_str);
294 let clone_base = PathBuf::from(expanded.as_ref());
295
296 for repo in &filtered {
297 let local_path = clone_base.join(&repo.name);
298
299 if local_path.exists() {
300 info!("Found local clone: {}", local_path.display());
301 all_paths.push(local_path);
302 } else if source.auto_clone {
303 info!("Cloning {} → {}", repo.name, local_path.display());
304 match clone_repo(&repo.clone_url, &local_path, &token).await {
305 Ok(()) => {
306 info!("Cloned {}", repo.name);
307 all_paths.push(local_path);
308 }
309 Err(e) => {
310 error!("Failed to clone {}: {}", repo.name, e);
311 }
312 }
313 } else {
314 info!("Skipping {} (not cloned, auto_clone=false)", repo.name);
315 }
316 }
317 }
318
319 all_paths.retain(|p| {
321 let key = p.canonicalize().unwrap_or_else(|_| p.clone());
322 seen.insert(key)
323 });
324
325 all_paths
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_matches_pattern_exact() {
334 assert!(matches_pattern("foo", "foo"));
335 assert!(!matches_pattern("foo", "bar"));
336 }
337
338 #[test]
339 fn test_matches_pattern_suffix_wildcard() {
340 assert!(matches_pattern("legacy-api", "legacy-*"));
341 assert!(matches_pattern("legacy-", "legacy-*"));
342 assert!(!matches_pattern("new-api", "legacy-*"));
343 }
344
345 #[test]
346 fn test_matches_pattern_prefix_wildcard() {
347 assert!(matches_pattern("repo.wiki", "*.wiki"));
348 assert!(matches_pattern(".wiki", "*.wiki"));
349 assert!(!matches_pattern("repo.git", "*.wiki"));
350 }
351
352 #[test]
353 fn test_matches_pattern_middle_wildcard() {
354 assert!(matches_pattern("test-foo-old", "test-*-old"));
355 assert!(matches_pattern("test--old", "test-*-old"));
356 assert!(!matches_pattern("test-foo-new", "test-*-old"));
357 }
358
359 #[test]
360 fn test_matches_pattern_star_only() {
361 assert!(matches_pattern("anything", "*"));
362 assert!(matches_pattern("", "*"));
363 }
364
365 #[test]
366 fn test_is_excluded() {
367 let patterns = vec!["*.wiki".to_string(), "legacy-*".to_string()];
368 assert!(is_excluded("repo.wiki", &patterns));
369 assert!(is_excluded("legacy-api", &patterns));
370 assert!(!is_excluded("codesearch", &patterns));
371 }
372}