1use chrono::Utc;
2use log::{debug, info, warn};
3use octocrab::models::issues::Issue;
4use octocrab::{Octocrab, Page};
5use tokio::time::{sleep, Duration};
6
7use crate::config::{FetchConfig, GitHubConfig};
8use crate::error::{GitHubFetchError, Result};
9use crate::filters::{IssueFilters, IssueState};
10use crate::types::{
11 CollectionResult, GitHubComment, GitHubIssue, GitHubLabel, GitHubUser, PrFile, PrReview,
12 PrReviewComment, Repository,
13};
14
15pub struct GitHubClient {
16 octocrab: Octocrab,
17 rate_limit_delay: Duration,
18 #[allow(dead_code)]
19 config: GitHubConfig,
20}
21
22impl GitHubClient {
23 pub fn new() -> Result<Self> {
24 Self::with_config(FetchConfig::default())
25 }
26
27 pub fn with_config(config: FetchConfig) -> Result<Self> {
28 let mut builder = Octocrab::builder();
29
30 let token = std::env::var(&config.github.token_env_var).map_err(|_| {
31 GitHubFetchError::AuthError(format!(
32 "{} environment variable not set",
33 config.github.token_env_var
34 ))
35 })?;
36
37 builder = builder.personal_token(token);
38
39 if !config.github.api_base_url.is_empty()
40 && config.github.api_base_url != "https://api.github.com"
41 {
42 builder = builder
43 .base_uri(&config.github.api_base_url)
44 .map_err(|e| GitHubFetchError::ConfigError(format!("Invalid base URI: {}", e)))?;
45 }
46
47 let octocrab = builder.build()?;
48 let rate_limit_delay = config.rate_limiting.delay_duration();
49
50 Ok(Self {
51 octocrab,
52 rate_limit_delay,
53 config: config.github,
54 })
55 }
56
57 fn convert_state(state: &IssueState) -> Option<octocrab::params::State> {
58 match state {
59 IssueState::Open => Some(octocrab::params::State::Open),
60 IssueState::Closed => Some(octocrab::params::State::Closed),
61 IssueState::All => None,
62 }
63 }
64
65 pub async fn fetch_issues(
66 &self,
67 repo: &Repository,
68 filters: &IssueFilters,
69 max_issues: Option<usize>,
70 ) -> Result<CollectionResult> {
71 info!("Collecting issues from {}", repo.full_name);
72
73 let mut all_issues = Vec::new();
74 let mut page = 1u32;
75 let per_page = 100u8;
76 let mut collected_count = 0;
77
78 loop {
79 debug!("Fetching page {} for {}", page, repo.full_name);
80
81 let issues_handler = self.octocrab.issues(&repo.owner, &repo.name);
82 let mut list_builder = issues_handler
83 .list()
84 .sort(octocrab::params::issues::Sort::Updated)
85 .direction(octocrab::params::Direction::Descending)
86 .per_page(per_page)
87 .page(page);
88
89 if let Some(state) = Self::convert_state(&filters.state) {
90 list_builder = list_builder.state(state);
91 }
92
93 if !filters.include_labels.is_empty() {
94 list_builder = list_builder.labels(&filters.include_labels);
95 }
96
97 if let Some(date_range) = &filters.date_range {
98 if let Some(since) = date_range.start {
99 list_builder = list_builder.since(since);
100 }
101 }
102
103 let issues_page: Page<Issue> = list_builder.send().await.map_err(|e| {
104 GitHubFetchError::ApiError(format!("Failed to fetch issues: {}", e))
105 })?;
106
107 sleep(self.rate_limit_delay).await;
108
109 if issues_page.items.is_empty() {
110 break;
111 }
112
113 for issue in issues_page.items {
114 let github_issue = self.convert_issue(issue).await?;
115
116 if filters.matches(&github_issue) {
117 all_issues.push(github_issue);
118 collected_count += 1;
119
120 if let Some(max) = max_issues {
121 if collected_count >= max {
122 info!("Reached maximum issue limit: {}", max);
123 break;
124 }
125 }
126 }
127 }
128
129 if let Some(max) = max_issues {
130 if collected_count >= max {
131 break;
132 }
133 }
134
135 page += 1;
136
137 if page > 100 {
138 warn!("Reached maximum page limit (100) for {}", repo.full_name);
139 break;
140 }
141 }
142
143 info!(
144 "Collected {} issues from {}",
145 all_issues.len(),
146 repo.full_name
147 );
148
149 Ok(CollectionResult {
150 repository: repo.clone(),
151 issues: all_issues,
152 total_collected: collected_count,
153 collection_time: Utc::now(),
154 filters_applied: self.describe_filters(filters),
155 })
156 }
157
158 pub async fn fetch_issue(&self, repo: &Repository, issue_number: u64) -> Result<GitHubIssue> {
159 sleep(self.rate_limit_delay).await;
160
161 let issue = self
162 .octocrab
163 .issues(&repo.owner, &repo.name)
164 .get(issue_number)
165 .await
166 .map_err(|e| {
167 GitHubFetchError::NotFound(format!("Issue #{} not found: {}", issue_number, e))
168 })?;
169
170 self.convert_issue(issue).await
171 }
172
173 pub async fn fetch_pr(&self, repo: &Repository, pr_number: u64) -> Result<GitHubIssue> {
174 sleep(self.rate_limit_delay).await;
175
176 let pr = self
177 .octocrab
178 .pulls(&repo.owner, &repo.name)
179 .get(pr_number)
180 .await
181 .map_err(|e| {
182 GitHubFetchError::NotFound(format!("PR #{} not found: {}", pr_number, e))
183 })?;
184
185 let merged_at = pr.merged_at;
186 let closed_at = pr.closed_at.or(merged_at);
187
188 Ok(GitHubIssue {
189 id: pr.id.0,
190 number: pr.number,
191 title: pr.title.unwrap_or_default(),
192 body: pr.body,
193 state: pr
194 .state
195 .map(|s| format!("{:?}", s))
196 .unwrap_or_else(|| "open".to_string()),
197 labels: pr
198 .labels
199 .unwrap_or_default()
200 .into_iter()
201 .map(|label| GitHubLabel {
202 id: label.id.0,
203 name: label.name,
204 color: label.color,
205 description: label.description,
206 })
207 .collect(),
208 user: if let Some(user) = pr.user {
209 GitHubUser {
210 id: user.id.0,
211 login: user.login,
212 avatar_url: user.avatar_url.to_string(),
213 }
214 } else {
215 GitHubUser {
216 id: 0,
217 login: "unknown".to_string(),
218 avatar_url: "".to_string(),
219 }
220 },
221 assignees: pr
222 .assignees
223 .unwrap_or_default()
224 .into_iter()
225 .map(|assignee| GitHubUser {
226 id: assignee.id.0,
227 login: assignee.login,
228 avatar_url: assignee.avatar_url.to_string(),
229 })
230 .collect(),
231 created_at: pr.created_at.unwrap_or_else(|| Utc::now()),
232 updated_at: pr.updated_at.unwrap_or_else(|| Utc::now()),
233 closed_at,
234 merged_at,
235 html_url: pr.html_url.map(|url| url.to_string()).unwrap_or_default(),
236 is_pull_request: true,
237 comments: pr.comments.unwrap_or(0) as u32,
238 })
239 }
240
241 pub async fn fetch_comments(
242 &self,
243 repo: &Repository,
244 issue_number: u64,
245 ) -> Result<Vec<GitHubComment>> {
246 debug!(
247 "Fetching comments for issue #{} in {}",
248 issue_number, repo.full_name
249 );
250
251 let mut comments = Vec::new();
252 let mut page = 1u32;
253
254 loop {
255 let comments_page = self
256 .octocrab
257 .issues(&repo.owner, &repo.name)
258 .list_comments(issue_number)
259 .per_page(100)
260 .page(page)
261 .send()
262 .await
263 .map_err(|e| {
264 GitHubFetchError::ApiError(format!("Failed to fetch comments: {}", e))
265 })?;
266
267 sleep(self.rate_limit_delay).await;
268
269 if comments_page.items.is_empty() {
270 break;
271 }
272
273 for comment in comments_page.items {
274 comments.push(GitHubComment {
275 id: comment.id.0,
276 user: GitHubUser {
277 id: comment.user.id.0,
278 login: comment.user.login,
279 avatar_url: comment.user.avatar_url.to_string(),
280 },
281 body: comment.body.unwrap_or_default(),
282 created_at: comment.created_at,
283 updated_at: comment.updated_at.unwrap_or(comment.created_at),
284 html_url: comment.html_url.to_string(),
285 });
286 }
287
288 page += 1;
289 }
290
291 Ok(comments)
292 }
293
294 pub async fn fetch_pr_files(&self, repo: &Repository, pr_number: u64) -> Result<Vec<PrFile>> {
295 sleep(self.rate_limit_delay).await;
296
297 let files = self
298 .octocrab
299 .pulls(&repo.owner, &repo.name)
300 .list_files(pr_number)
301 .await
302 .map_err(|e| GitHubFetchError::ApiError(format!("Failed to fetch PR files: {}", e)))?;
303
304 Ok(files
305 .items
306 .into_iter()
307 .map(|file| PrFile {
308 filename: file.filename,
309 status: format!("{:?}", file.status),
310 additions: file.additions as u32,
311 deletions: file.deletions as u32,
312 changes: file.changes as u32,
313 patch: file.patch,
314 })
315 .collect())
316 }
317
318 pub async fn fetch_pr_reviews(
320 &self,
321 repo: &Repository,
322 pr_number: u64,
323 ) -> Result<Vec<PrReview>> {
324 debug!(
325 "Fetching reviews for PR #{} in {}",
326 pr_number, repo.full_name
327 );
328
329 sleep(self.rate_limit_delay).await;
330
331 let reviews = self
332 .octocrab
333 .pulls(&repo.owner, &repo.name)
334 .list_reviews(pr_number)
335 .send()
336 .await
337 .map_err(|e| GitHubFetchError::ApiError(format!("Failed to fetch PR reviews: {}", e)))?;
338
339 Ok(reviews
340 .items
341 .into_iter()
342 .map(|review| PrReview {
343 id: review.id.0,
344 user: GitHubUser {
345 id: review.user.as_ref().map(|u| u.id.0).unwrap_or(0),
346 login: review
347 .user
348 .as_ref()
349 .map(|u| u.login.clone())
350 .unwrap_or_else(|| "unknown".to_string()),
351 avatar_url: review
352 .user
353 .as_ref()
354 .map(|u| u.avatar_url.to_string())
355 .unwrap_or_default(),
356 },
357 body: review.body,
358 state: review
359 .state
360 .map(|s| format!("{:?}", s))
361 .unwrap_or_else(|| "UNKNOWN".to_string()),
362 submitted_at: review.submitted_at,
363 html_url: review.html_url.to_string(),
364 commit_id: review.commit_id,
365 })
366 .collect())
367 }
368
369 pub async fn fetch_pr_review_comments(
371 &self,
372 repo: &Repository,
373 pr_number: u64,
374 ) -> Result<Vec<PrReviewComment>> {
375 debug!(
376 "Fetching review comments for PR #{} in {}",
377 pr_number, repo.full_name
378 );
379
380 let mut comments = Vec::new();
381 let mut page = 1u32;
382
383 loop {
384 sleep(self.rate_limit_delay).await;
385
386 let url = format!(
387 "/repos/{}/{}/pulls/{}/comments?per_page=100&page={}",
388 repo.owner, repo.name, pr_number, page
389 );
390
391 let response: Vec<serde_json::Value> = self
392 .octocrab
393 .get(&url, None::<&()>)
394 .await
395 .map_err(|e| {
396 GitHubFetchError::ApiError(format!("Failed to fetch review comments: {}", e))
397 })?;
398
399 if response.is_empty() {
400 break;
401 }
402
403 for comment in response {
404 if let Some(parsed) = self.parse_review_comment(&comment) {
405 comments.push(parsed);
406 }
407 }
408
409 page += 1;
410 }
411
412 Ok(comments)
413 }
414
415 fn parse_review_comment(&self, comment: &serde_json::Value) -> Option<PrReviewComment> {
416 let user = comment.get("user")?;
417
418 Some(PrReviewComment {
419 id: comment.get("id")?.as_u64()?,
420 review_id: comment
421 .get("pull_request_review_id")
422 .and_then(|v| v.as_u64()),
423 user: GitHubUser {
424 id: user.get("id")?.as_u64()?,
425 login: user.get("login")?.as_str()?.to_string(),
426 avatar_url: user.get("avatar_url")?.as_str()?.to_string(),
427 },
428 body: comment.get("body")?.as_str()?.to_string(),
429 path: comment.get("path")?.as_str()?.to_string(),
430 line: comment.get("line").and_then(|v| v.as_u64()).map(|v| v as u32),
431 original_line: comment
432 .get("original_line")
433 .and_then(|v| v.as_u64())
434 .map(|v| v as u32),
435 diff_hunk: comment
436 .get("diff_hunk")
437 .and_then(|v| v.as_str())
438 .unwrap_or("")
439 .to_string(),
440 side: comment
441 .get("side")
442 .and_then(|v| v.as_str())
443 .map(|s| s.to_string()),
444 commit_id: comment
445 .get("commit_id")
446 .and_then(|v| v.as_str())
447 .map(|s| s.to_string()),
448 created_at: comment
449 .get("created_at")?
450 .as_str()?
451 .parse()
452 .ok()?,
453 updated_at: comment
454 .get("updated_at")?
455 .as_str()?
456 .parse()
457 .ok()?,
458 html_url: comment.get("html_url")?.as_str()?.to_string(),
459 position: comment
460 .get("position")
461 .and_then(|v| v.as_u64())
462 .map(|v| v as u32),
463 in_reply_to_id: comment.get("in_reply_to_id").and_then(|v| v.as_u64()),
464 })
465 }
466
467 pub async fn test_connection(&self) -> Result<()> {
468 debug!("Testing GitHub API connection");
469
470 self.octocrab
471 .ratelimit()
472 .get()
473 .await
474 .map_err(|e| GitHubFetchError::ApiError(format!("Connection test failed: {}", e)))?;
475
476 info!("GitHub API connection successful");
477 Ok(())
478 }
479
480 pub async fn get_rate_limit(&self) -> Result<String> {
481 let rate_limit =
482 self.octocrab.ratelimit().get().await.map_err(|e| {
483 GitHubFetchError::ApiError(format!("Failed to get rate limit: {}", e))
484 })?;
485
486 Ok(format!(
487 "Rate limit: {}/{} remaining, resets at {}",
488 rate_limit.resources.core.remaining,
489 rate_limit.resources.core.limit,
490 rate_limit.resources.core.reset
491 ))
492 }
493
494 async fn convert_issue(&self, issue: Issue) -> Result<GitHubIssue> {
495 let is_pull_request = issue.pull_request.is_some();
496
497 let merged_at = if is_pull_request {
498 self.get_pr_merged_at(&issue).await?
499 } else {
500 None
501 };
502
503 Ok(GitHubIssue {
504 id: issue.id.0,
505 number: issue.number,
506 title: issue.title,
507 body: issue.body,
508 state: format!("{:?}", issue.state),
509 labels: issue
510 .labels
511 .into_iter()
512 .map(|label| GitHubLabel {
513 id: label.id.0,
514 name: label.name,
515 color: label.color,
516 description: label.description,
517 })
518 .collect(),
519 user: GitHubUser {
520 id: issue.user.id.0,
521 login: issue.user.login,
522 avatar_url: issue.user.avatar_url.to_string(),
523 },
524 assignees: issue
525 .assignees
526 .into_iter()
527 .map(|assignee| GitHubUser {
528 id: assignee.id.0,
529 login: assignee.login,
530 avatar_url: assignee.avatar_url.to_string(),
531 })
532 .collect(),
533 created_at: issue.created_at,
534 updated_at: issue.updated_at,
535 closed_at: issue.closed_at,
536 merged_at,
537 html_url: issue.html_url.to_string(),
538 is_pull_request,
539 comments: issue.comments,
540 })
541 }
542
543 async fn get_pr_merged_at(
544 &self,
545 issue: &Issue,
546 ) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
547 if let Some(ref _pr_url) = issue.pull_request {
548 let repo_url_str = issue.repository_url.to_string();
549 let parts: Vec<&str> = repo_url_str.trim_end_matches('/').split('/').collect();
550
551 if parts.len() >= 2 {
552 let owner = parts[parts.len() - 2];
553 let repo = parts[parts.len() - 1];
554
555 match self.octocrab.pulls(owner, repo).get(issue.number).await {
556 Ok(pr) => {
557 sleep(self.rate_limit_delay).await;
558 Ok(pr.merged_at)
559 }
560 Err(e) => {
561 warn!("Failed to fetch PR #{} merged_at: {}", issue.number, e);
562 Ok(None)
563 }
564 }
565 } else {
566 Ok(None)
567 }
568 } else {
569 Ok(None)
570 }
571 }
572
573 fn describe_filters(&self, filters: &IssueFilters) -> Vec<String> {
574 let mut descriptions = Vec::new();
575
576 if !filters.include_labels.is_empty() {
577 descriptions.push(format!("include_labels: {:?}", filters.include_labels));
578 }
579 if !filters.exclude_labels.is_empty() {
580 descriptions.push(format!("exclude_labels: {:?}", filters.exclude_labels));
581 }
582 if filters.rust_errors_only {
583 descriptions.push("rust_errors_only: true".to_string());
584 }
585 if filters.code_blocks_only {
586 descriptions.push("code_blocks_only: true".to_string());
587 }
588 if let Some(min_length) = filters.min_body_length {
589 descriptions.push(format!("min_body_length: {}", min_length));
590 }
591 if !filters.include_pull_requests {
592 descriptions.push("exclude_pull_requests: true".to_string());
593 }
594 if let Some(min_comments) = filters.min_comments {
595 descriptions.push(format!("min_comments: {}", min_comments));
596 }
597
598 descriptions
599 }
600}