github_fetch/
client.rs

1use chrono::Utc;
2use log::{debug, info, warn};
3use octocrab::models::issues::Issue;
4use octocrab::{Octocrab, Page};
5use tokio::time::{sleep, Duration};
6
7use crate::config::{FetchConfig, GitHubConfig};
8use crate::error::{GitHubFetchError, Result};
9use crate::filters::{IssueFilters, IssueState};
10use crate::types::{
11    CollectionResult, GitHubComment, GitHubIssue, GitHubLabel, GitHubUser, PrFile, PrReview,
12    PrReviewComment, Repository,
13};
14
15pub struct GitHubClient {
16    octocrab: Octocrab,
17    rate_limit_delay: Duration,
18    #[allow(dead_code)]
19    config: GitHubConfig,
20}
21
22impl GitHubClient {
23    pub fn new() -> Result<Self> {
24        Self::with_config(FetchConfig::default())
25    }
26
27    pub fn with_config(config: FetchConfig) -> Result<Self> {
28        let mut builder = Octocrab::builder();
29
30        let token = std::env::var(&config.github.token_env_var).map_err(|_| {
31            GitHubFetchError::AuthError(format!(
32                "{} environment variable not set",
33                config.github.token_env_var
34            ))
35        })?;
36
37        builder = builder.personal_token(token);
38
39        if !config.github.api_base_url.is_empty()
40            && config.github.api_base_url != "https://api.github.com"
41        {
42            builder = builder
43                .base_uri(&config.github.api_base_url)
44                .map_err(|e| GitHubFetchError::ConfigError(format!("Invalid base URI: {}", e)))?;
45        }
46
47        let octocrab = builder.build()?;
48        let rate_limit_delay = config.rate_limiting.delay_duration();
49
50        Ok(Self {
51            octocrab,
52            rate_limit_delay,
53            config: config.github,
54        })
55    }
56
57    fn convert_state(state: &IssueState) -> Option<octocrab::params::State> {
58        match state {
59            IssueState::Open => Some(octocrab::params::State::Open),
60            IssueState::Closed => Some(octocrab::params::State::Closed),
61            IssueState::All => None,
62        }
63    }
64
65    pub async fn fetch_issues(
66        &self,
67        repo: &Repository,
68        filters: &IssueFilters,
69        max_issues: Option<usize>,
70    ) -> Result<CollectionResult> {
71        info!("Collecting issues from {}", repo.full_name);
72
73        let mut all_issues = Vec::new();
74        let mut page = 1u32;
75        let per_page = 100u8;
76        let mut collected_count = 0;
77
78        loop {
79            debug!("Fetching page {} for {}", page, repo.full_name);
80
81            let issues_handler = self.octocrab.issues(&repo.owner, &repo.name);
82            let mut list_builder = issues_handler
83                .list()
84                .sort(octocrab::params::issues::Sort::Updated)
85                .direction(octocrab::params::Direction::Descending)
86                .per_page(per_page)
87                .page(page);
88
89            if let Some(state) = Self::convert_state(&filters.state) {
90                list_builder = list_builder.state(state);
91            }
92
93            if !filters.include_labels.is_empty() {
94                list_builder = list_builder.labels(&filters.include_labels);
95            }
96
97            if let Some(date_range) = &filters.date_range {
98                if let Some(since) = date_range.start {
99                    list_builder = list_builder.since(since);
100                }
101            }
102
103            let issues_page: Page<Issue> = list_builder.send().await.map_err(|e| {
104                GitHubFetchError::ApiError(format!("Failed to fetch issues: {}", e))
105            })?;
106
107            sleep(self.rate_limit_delay).await;
108
109            if issues_page.items.is_empty() {
110                break;
111            }
112
113            for issue in issues_page.items {
114                let github_issue = self.convert_issue(issue).await?;
115
116                if filters.matches(&github_issue) {
117                    all_issues.push(github_issue);
118                    collected_count += 1;
119
120                    if let Some(max) = max_issues {
121                        if collected_count >= max {
122                            info!("Reached maximum issue limit: {}", max);
123                            break;
124                        }
125                    }
126                }
127            }
128
129            if let Some(max) = max_issues {
130                if collected_count >= max {
131                    break;
132                }
133            }
134
135            page += 1;
136
137            if page > 100 {
138                warn!("Reached maximum page limit (100) for {}", repo.full_name);
139                break;
140            }
141        }
142
143        info!(
144            "Collected {} issues from {}",
145            all_issues.len(),
146            repo.full_name
147        );
148
149        Ok(CollectionResult {
150            repository: repo.clone(),
151            issues: all_issues,
152            total_collected: collected_count,
153            collection_time: Utc::now(),
154            filters_applied: self.describe_filters(filters),
155        })
156    }
157
158    pub async fn fetch_issue(&self, repo: &Repository, issue_number: u64) -> Result<GitHubIssue> {
159        sleep(self.rate_limit_delay).await;
160
161        let issue = self
162            .octocrab
163            .issues(&repo.owner, &repo.name)
164            .get(issue_number)
165            .await
166            .map_err(|e| {
167                GitHubFetchError::NotFound(format!("Issue #{} not found: {}", issue_number, e))
168            })?;
169
170        self.convert_issue(issue).await
171    }
172
173    pub async fn fetch_pr(&self, repo: &Repository, pr_number: u64) -> Result<GitHubIssue> {
174        sleep(self.rate_limit_delay).await;
175
176        let pr = self
177            .octocrab
178            .pulls(&repo.owner, &repo.name)
179            .get(pr_number)
180            .await
181            .map_err(|e| {
182                GitHubFetchError::NotFound(format!("PR #{} not found: {}", pr_number, e))
183            })?;
184
185        let merged_at = pr.merged_at;
186        let closed_at = pr.closed_at.or(merged_at);
187
188        Ok(GitHubIssue {
189            id: pr.id.0,
190            number: pr.number,
191            title: pr.title.unwrap_or_default(),
192            body: pr.body,
193            state: pr
194                .state
195                .map(|s| format!("{:?}", s))
196                .unwrap_or_else(|| "open".to_string()),
197            labels: pr
198                .labels
199                .unwrap_or_default()
200                .into_iter()
201                .map(|label| GitHubLabel {
202                    id: label.id.0,
203                    name: label.name,
204                    color: label.color,
205                    description: label.description,
206                })
207                .collect(),
208            user: if let Some(user) = pr.user {
209                GitHubUser {
210                    id: user.id.0,
211                    login: user.login,
212                    avatar_url: user.avatar_url.to_string(),
213                }
214            } else {
215                GitHubUser {
216                    id: 0,
217                    login: "unknown".to_string(),
218                    avatar_url: "".to_string(),
219                }
220            },
221            assignees: pr
222                .assignees
223                .unwrap_or_default()
224                .into_iter()
225                .map(|assignee| GitHubUser {
226                    id: assignee.id.0,
227                    login: assignee.login,
228                    avatar_url: assignee.avatar_url.to_string(),
229                })
230                .collect(),
231            created_at: pr.created_at.unwrap_or_else(|| Utc::now()),
232            updated_at: pr.updated_at.unwrap_or_else(|| Utc::now()),
233            closed_at,
234            merged_at,
235            html_url: pr.html_url.map(|url| url.to_string()).unwrap_or_default(),
236            is_pull_request: true,
237            comments: pr.comments.unwrap_or(0) as u32,
238        })
239    }
240
241    pub async fn fetch_comments(
242        &self,
243        repo: &Repository,
244        issue_number: u64,
245    ) -> Result<Vec<GitHubComment>> {
246        debug!(
247            "Fetching comments for issue #{} in {}",
248            issue_number, repo.full_name
249        );
250
251        let mut comments = Vec::new();
252        let mut page = 1u32;
253
254        loop {
255            let comments_page = self
256                .octocrab
257                .issues(&repo.owner, &repo.name)
258                .list_comments(issue_number)
259                .per_page(100)
260                .page(page)
261                .send()
262                .await
263                .map_err(|e| {
264                    GitHubFetchError::ApiError(format!("Failed to fetch comments: {}", e))
265                })?;
266
267            sleep(self.rate_limit_delay).await;
268
269            if comments_page.items.is_empty() {
270                break;
271            }
272
273            for comment in comments_page.items {
274                comments.push(GitHubComment {
275                    id: comment.id.0,
276                    user: GitHubUser {
277                        id: comment.user.id.0,
278                        login: comment.user.login,
279                        avatar_url: comment.user.avatar_url.to_string(),
280                    },
281                    body: comment.body.unwrap_or_default(),
282                    created_at: comment.created_at,
283                    updated_at: comment.updated_at.unwrap_or(comment.created_at),
284                    html_url: comment.html_url.to_string(),
285                });
286            }
287
288            page += 1;
289        }
290
291        Ok(comments)
292    }
293
294    pub async fn fetch_pr_files(&self, repo: &Repository, pr_number: u64) -> Result<Vec<PrFile>> {
295        sleep(self.rate_limit_delay).await;
296
297        let files = self
298            .octocrab
299            .pulls(&repo.owner, &repo.name)
300            .list_files(pr_number)
301            .await
302            .map_err(|e| GitHubFetchError::ApiError(format!("Failed to fetch PR files: {}", e)))?;
303
304        Ok(files
305            .items
306            .into_iter()
307            .map(|file| PrFile {
308                filename: file.filename,
309                status: format!("{:?}", file.status),
310                additions: file.additions as u32,
311                deletions: file.deletions as u32,
312                changes: file.changes as u32,
313                patch: file.patch,
314            })
315            .collect())
316    }
317
318    /// Fetch all reviews for a PR
319    pub async fn fetch_pr_reviews(
320        &self,
321        repo: &Repository,
322        pr_number: u64,
323    ) -> Result<Vec<PrReview>> {
324        debug!(
325            "Fetching reviews for PR #{} in {}",
326            pr_number, repo.full_name
327        );
328
329        sleep(self.rate_limit_delay).await;
330
331        let reviews = self
332            .octocrab
333            .pulls(&repo.owner, &repo.name)
334            .list_reviews(pr_number)
335            .send()
336            .await
337            .map_err(|e| GitHubFetchError::ApiError(format!("Failed to fetch PR reviews: {}", e)))?;
338
339        Ok(reviews
340            .items
341            .into_iter()
342            .map(|review| PrReview {
343                id: review.id.0,
344                user: GitHubUser {
345                    id: review.user.as_ref().map(|u| u.id.0).unwrap_or(0),
346                    login: review
347                        .user
348                        .as_ref()
349                        .map(|u| u.login.clone())
350                        .unwrap_or_else(|| "unknown".to_string()),
351                    avatar_url: review
352                        .user
353                        .as_ref()
354                        .map(|u| u.avatar_url.to_string())
355                        .unwrap_or_default(),
356                },
357                body: review.body,
358                state: review
359                    .state
360                    .map(|s| format!("{:?}", s))
361                    .unwrap_or_else(|| "UNKNOWN".to_string()),
362                submitted_at: review.submitted_at,
363                html_url: review.html_url.to_string(),
364                commit_id: review.commit_id,
365            })
366            .collect())
367    }
368
369    /// Fetch all review comments (inline comments on diff) for a PR
370    pub async fn fetch_pr_review_comments(
371        &self,
372        repo: &Repository,
373        pr_number: u64,
374    ) -> Result<Vec<PrReviewComment>> {
375        debug!(
376            "Fetching review comments for PR #{} in {}",
377            pr_number, repo.full_name
378        );
379
380        let mut comments = Vec::new();
381        let mut page = 1u32;
382
383        loop {
384            sleep(self.rate_limit_delay).await;
385
386            let url = format!(
387                "/repos/{}/{}/pulls/{}/comments?per_page=100&page={}",
388                repo.owner, repo.name, pr_number, page
389            );
390
391            let response: Vec<serde_json::Value> = self
392                .octocrab
393                .get(&url, None::<&()>)
394                .await
395                .map_err(|e| {
396                    GitHubFetchError::ApiError(format!("Failed to fetch review comments: {}", e))
397                })?;
398
399            if response.is_empty() {
400                break;
401            }
402
403            for comment in response {
404                if let Some(parsed) = self.parse_review_comment(&comment) {
405                    comments.push(parsed);
406                }
407            }
408
409            page += 1;
410        }
411
412        Ok(comments)
413    }
414
415    fn parse_review_comment(&self, comment: &serde_json::Value) -> Option<PrReviewComment> {
416        let user = comment.get("user")?;
417
418        Some(PrReviewComment {
419            id: comment.get("id")?.as_u64()?,
420            review_id: comment
421                .get("pull_request_review_id")
422                .and_then(|v| v.as_u64()),
423            user: GitHubUser {
424                id: user.get("id")?.as_u64()?,
425                login: user.get("login")?.as_str()?.to_string(),
426                avatar_url: user.get("avatar_url")?.as_str()?.to_string(),
427            },
428            body: comment.get("body")?.as_str()?.to_string(),
429            path: comment.get("path")?.as_str()?.to_string(),
430            line: comment.get("line").and_then(|v| v.as_u64()).map(|v| v as u32),
431            original_line: comment
432                .get("original_line")
433                .and_then(|v| v.as_u64())
434                .map(|v| v as u32),
435            diff_hunk: comment
436                .get("diff_hunk")
437                .and_then(|v| v.as_str())
438                .unwrap_or("")
439                .to_string(),
440            side: comment
441                .get("side")
442                .and_then(|v| v.as_str())
443                .map(|s| s.to_string()),
444            commit_id: comment
445                .get("commit_id")
446                .and_then(|v| v.as_str())
447                .map(|s| s.to_string()),
448            created_at: comment
449                .get("created_at")?
450                .as_str()?
451                .parse()
452                .ok()?,
453            updated_at: comment
454                .get("updated_at")?
455                .as_str()?
456                .parse()
457                .ok()?,
458            html_url: comment.get("html_url")?.as_str()?.to_string(),
459            position: comment
460                .get("position")
461                .and_then(|v| v.as_u64())
462                .map(|v| v as u32),
463            in_reply_to_id: comment.get("in_reply_to_id").and_then(|v| v.as_u64()),
464        })
465    }
466
467    pub async fn test_connection(&self) -> Result<()> {
468        debug!("Testing GitHub API connection");
469
470        self.octocrab
471            .ratelimit()
472            .get()
473            .await
474            .map_err(|e| GitHubFetchError::ApiError(format!("Connection test failed: {}", e)))?;
475
476        info!("GitHub API connection successful");
477        Ok(())
478    }
479
480    pub async fn get_rate_limit(&self) -> Result<String> {
481        let rate_limit =
482            self.octocrab.ratelimit().get().await.map_err(|e| {
483                GitHubFetchError::ApiError(format!("Failed to get rate limit: {}", e))
484            })?;
485
486        Ok(format!(
487            "Rate limit: {}/{} remaining, resets at {}",
488            rate_limit.resources.core.remaining,
489            rate_limit.resources.core.limit,
490            rate_limit.resources.core.reset
491        ))
492    }
493
494    async fn convert_issue(&self, issue: Issue) -> Result<GitHubIssue> {
495        let is_pull_request = issue.pull_request.is_some();
496
497        let merged_at = if is_pull_request {
498            self.get_pr_merged_at(&issue).await?
499        } else {
500            None
501        };
502
503        Ok(GitHubIssue {
504            id: issue.id.0,
505            number: issue.number,
506            title: issue.title,
507            body: issue.body,
508            state: format!("{:?}", issue.state),
509            labels: issue
510                .labels
511                .into_iter()
512                .map(|label| GitHubLabel {
513                    id: label.id.0,
514                    name: label.name,
515                    color: label.color,
516                    description: label.description,
517                })
518                .collect(),
519            user: GitHubUser {
520                id: issue.user.id.0,
521                login: issue.user.login,
522                avatar_url: issue.user.avatar_url.to_string(),
523            },
524            assignees: issue
525                .assignees
526                .into_iter()
527                .map(|assignee| GitHubUser {
528                    id: assignee.id.0,
529                    login: assignee.login,
530                    avatar_url: assignee.avatar_url.to_string(),
531                })
532                .collect(),
533            created_at: issue.created_at,
534            updated_at: issue.updated_at,
535            closed_at: issue.closed_at,
536            merged_at,
537            html_url: issue.html_url.to_string(),
538            is_pull_request,
539            comments: issue.comments,
540        })
541    }
542
543    async fn get_pr_merged_at(
544        &self,
545        issue: &Issue,
546    ) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
547        if let Some(ref _pr_url) = issue.pull_request {
548            let repo_url_str = issue.repository_url.to_string();
549            let parts: Vec<&str> = repo_url_str.trim_end_matches('/').split('/').collect();
550
551            if parts.len() >= 2 {
552                let owner = parts[parts.len() - 2];
553                let repo = parts[parts.len() - 1];
554
555                match self.octocrab.pulls(owner, repo).get(issue.number).await {
556                    Ok(pr) => {
557                        sleep(self.rate_limit_delay).await;
558                        Ok(pr.merged_at)
559                    }
560                    Err(e) => {
561                        warn!("Failed to fetch PR #{} merged_at: {}", issue.number, e);
562                        Ok(None)
563                    }
564                }
565            } else {
566                Ok(None)
567            }
568        } else {
569            Ok(None)
570        }
571    }
572
573    fn describe_filters(&self, filters: &IssueFilters) -> Vec<String> {
574        let mut descriptions = Vec::new();
575
576        if !filters.include_labels.is_empty() {
577            descriptions.push(format!("include_labels: {:?}", filters.include_labels));
578        }
579        if !filters.exclude_labels.is_empty() {
580            descriptions.push(format!("exclude_labels: {:?}", filters.exclude_labels));
581        }
582        if filters.rust_errors_only {
583            descriptions.push("rust_errors_only: true".to_string());
584        }
585        if filters.code_blocks_only {
586            descriptions.push("code_blocks_only: true".to_string());
587        }
588        if let Some(min_length) = filters.min_body_length {
589            descriptions.push(format!("min_body_length: {}", min_length));
590        }
591        if !filters.include_pull_requests {
592            descriptions.push("exclude_pull_requests: true".to_string());
593        }
594        if let Some(min_comments) = filters.min_comments {
595            descriptions.push(format!("min_comments: {}", min_comments));
596        }
597
598        descriptions
599    }
600}