Skip to main content

pr_bro/github/
search.rs

1use anyhow::{anyhow, Context, Result};
2use futures::stream::{FuturesUnordered, StreamExt};
3use octocrab::Octocrab;
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::Arc;
6
7use crate::github::types::PullRequest;
8
9/// Search GitHub for pull requests matching the given query.
10/// Auth errors (401 / Bad credentials) fail immediately as a typed AuthError.
11/// Rate limit and permission errors also fail immediately.
12/// Transient/network errors are retried up to 3 times with exponential backoff.
13pub async fn search_prs(client: &Octocrab, query: &str) -> Result<Vec<PullRequest>> {
14    // Ensure the query only returns PRs, not issues
15    let query = if query.contains("is:pr") {
16        query.to_string()
17    } else {
18        format!("{} is:pr", query)
19    };
20
21    let max_retries = 3;
22    let mut attempt = 0;
23
24    loop {
25        attempt += 1;
26        match client
27            .search()
28            .issues_and_pull_requests(&query)
29            .send()
30            .await
31        {
32            Ok(results) => {
33                let prs: Vec<PullRequest> = results
34                    .items
35                    .into_iter()
36                    .filter(|issue| issue.pull_request.is_some()) // Only PRs, not issues
37                    .map(|issue| {
38                        // Extract owner/repo from html_url
39                        // Format: "https://github.com/owner/repo/pull/123"
40                        let path = issue.html_url.path();
41                        let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
42                        let repo = if parts.len() >= 2 {
43                            format!("{}/{}", parts[0], parts[1])
44                        } else {
45                            "unknown/unknown".to_string()
46                        };
47
48                        PullRequest {
49                            title: issue.title,
50                            number: issue.number,
51                            author: issue.user.login.clone(),
52                            repo,
53                            url: issue.html_url.to_string(),
54                            created_at: issue.created_at,
55                            updated_at: issue.updated_at,
56                            additions: 0, // Search API doesn't include these
57                            deletions: 0, // Will be populated by enrichment
58                            approvals: 0, // Requires separate API call
59                            draft: false, // Search API doesn't expose draft status reliably
60                            labels: issue.labels.iter().map(|l| l.name.clone()).collect(),
61                            user_has_reviewed: false, // Will be populated by enrichment
62                            filtered_size: None, // Will be set by enrich_pr if exclude patterns configured
63                        }
64                    })
65                    .collect();
66                return Ok(prs);
67            }
68            Err(e) => {
69                let error_str = format!("{:?}", e);
70
71                // Auth errors: fail immediately with typed AuthError (no retry)
72                if error_str.contains("401") || error_str.contains("Bad credentials") {
73                    return Err(crate::fetch::AuthError {
74                        message:
75                            "Authentication failed. Your GitHub token may be invalid or expired."
76                                .to_string(),
77                    }
78                    .into());
79                }
80
81                // Rate limit: fail immediately (caller handles differently)
82                if error_str.contains("rate limit") || error_str.contains("403") {
83                    return Err(anyhow!(
84                        "GitHub API rate limit exceeded. Wait a few minutes and try again."
85                    ));
86                }
87
88                // Permission errors: fail immediately
89                if error_str.contains("do not have permission")
90                    || error_str.contains("resources do not exist")
91                {
92                    return Err(anyhow!("Repository not found or no access. Check repo name and token permissions (needs 'repo' scope for private repos)."));
93                }
94
95                // Transient errors: retry with backoff
96                if attempt >= max_retries {
97                    return Err(anyhow!(
98                        "GitHub API error after {} attempts: {}",
99                        max_retries,
100                        e
101                    ));
102                }
103
104                let delay = std::time::Duration::from_millis(100 * (1 << (attempt - 1))); // 100ms, 200ms, 400ms
105                tokio::time::sleep(delay).await;
106            }
107        }
108    }
109}
110
111/// Fetch PR details (additions, deletions) from the GitHub API
112async fn fetch_pr_details(
113    client: &Octocrab,
114    owner: &str,
115    repo: &str,
116    number: u64,
117) -> Result<(u64, u64)> {
118    let pr = client
119        .pulls(owner, repo)
120        .get(number)
121        .await
122        .context("Failed to fetch PR details")?;
123
124    let additions = pr.additions.unwrap_or(0);
125    let deletions = pr.deletions.unwrap_or(0);
126
127    Ok((additions, deletions))
128}
129
130/// Fetch PR review count (approved reviews) and check if authenticated user has reviewed
131async fn fetch_pr_reviews(
132    client: &Octocrab,
133    owner: &str,
134    repo: &str,
135    number: u64,
136    auth_username: Option<&str>,
137) -> Result<(u32, bool)> {
138    let reviews = client
139        .pulls(owner, repo)
140        .list_reviews(number)
141        .send()
142        .await
143        .context("Failed to fetch PR reviews")?;
144
145    let approved_count = reviews
146        .items
147        .iter()
148        .filter(|review| {
149            matches!(
150                review.state,
151                Some(octocrab::models::pulls::ReviewState::Approved)
152            )
153        })
154        .count() as u32;
155
156    // Check if authenticated user has reviewed (any review state counts)
157    let user_has_reviewed = auth_username.is_some_and(|username| {
158        reviews.items.iter().any(|r| {
159            r.user
160                .as_ref()
161                .is_some_and(|u| u.login.eq_ignore_ascii_case(username))
162        })
163    });
164
165    Ok((approved_count, user_has_reviewed))
166}
167
168/// Fetch per-file diff data for a PR with pagination.
169/// Returns a list of (filename, additions, deletions) tuples.
170async fn fetch_pr_file_list(
171    client: &Octocrab,
172    owner: &str,
173    repo: &str,
174    number: u64,
175) -> Result<Vec<(String, u64, u64)>> {
176    let page = client
177        .pulls(owner, repo)
178        .list_files(number)
179        .await
180        .context("Failed to fetch PR file list")?;
181
182    let all_files = client
183        .all_pages(page)
184        .await
185        .context("Failed to paginate PR file list")?;
186
187    Ok(all_files
188        .into_iter()
189        .map(|f| (f.filename, f.additions, f.deletions))
190        .collect())
191}
192
193/// Filter files by basename glob matching and compute total size of non-excluded files.
194fn apply_size_exclusions(files: &[(String, u64, u64)], exclude_patterns: &[String]) -> Result<u64> {
195    let compiled: Vec<glob::Pattern> = exclude_patterns
196        .iter()
197        .map(|p| glob::Pattern::new(p).context(format!("Invalid glob pattern: {}", p)))
198        .collect::<Result<Vec<_>>>()?;
199
200    let total = files
201        .iter()
202        .filter(|(filename, _, _)| {
203            let basename = std::path::Path::new(filename)
204                .file_name()
205                .and_then(|n| n.to_str())
206                .unwrap_or(filename);
207            !compiled.iter().any(|pat| pat.matches(basename))
208        })
209        .map(|(_, additions, deletions)| additions + deletions)
210        .sum();
211
212    Ok(total)
213}
214
215/// Enrich a PR with detailed information (size and approvals)
216async fn enrich_pr(
217    client: &Octocrab,
218    pr: &mut PullRequest,
219    auth_username: Option<&str>,
220    exclude_patterns: &Option<Vec<String>>,
221) -> Result<()> {
222    // Parse owner/repo from pr.repo field
223    let parts: Vec<&str> = pr.repo.split('/').collect();
224    if parts.len() != 2 {
225        return Err(anyhow!("Invalid repo format: {}", pr.repo));
226    }
227    let owner = parts[0];
228    let repo_name = parts[1];
229
230    // Fetch details and reviews in parallel
231    let details_fut = fetch_pr_details(client, owner, repo_name, pr.number);
232    let reviews_fut = fetch_pr_reviews(client, owner, repo_name, pr.number, auth_username);
233
234    match tokio::try_join!(details_fut, reviews_fut) {
235        Ok(((additions, deletions), (approvals, user_has_reviewed))) => {
236            pr.additions = additions;
237            pr.deletions = deletions;
238            pr.approvals = approvals;
239            pr.user_has_reviewed = user_has_reviewed;
240
241            // Conditionally fetch per-file data and apply size exclusions
242            if let Some(ref patterns) = exclude_patterns {
243                if !patterns.is_empty() {
244                    match fetch_pr_file_list(client, owner, repo_name, pr.number).await {
245                        Ok(files) => {
246                            match apply_size_exclusions(&files, patterns) {
247                                Ok(filtered) => pr.filtered_size = Some(filtered),
248                                Err(e) => {
249                                    eprintln!(
250                                        "Warning: Failed to apply size exclusions for PR {}: {}",
251                                        pr.number, e
252                                    );
253                                    // Leave filtered_size as None — fallback to aggregate size
254                                }
255                            }
256                        }
257                        Err(e) => {
258                            eprintln!(
259                                "Warning: Failed to fetch file list for PR {}: {}",
260                                pr.number, e
261                            );
262                            // Leave filtered_size as None — fallback to aggregate size
263                        }
264                    }
265                }
266            }
267
268            Ok(())
269        }
270        Err(e) => {
271            // If enrichment fails, log but don't fail the whole operation
272            eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
273            Ok(())
274        }
275    }
276}
277
278/// Helper function for concurrent PR enrichment
279async fn enrich_pr_with_rate_limit_check(
280    client: Octocrab,
281    mut pr: PullRequest,
282    rate_limited: Arc<AtomicBool>,
283    auth_username: Option<String>,
284    exclude_patterns: Option<Vec<String>>,
285) -> PullRequest {
286    if rate_limited.load(Ordering::Relaxed) {
287        return pr; // Skip enrichment if rate limited
288    }
289
290    match enrich_pr(
291        &client,
292        &mut pr,
293        auth_username.as_deref(),
294        &exclude_patterns,
295    )
296    .await
297    {
298        Ok(_) => {}
299        Err(e) => {
300            let err_str = e.to_string();
301            if err_str.contains("rate limit") || err_str.contains("403") {
302                eprintln!("Warning: Rate limit hit during enrichment. Returning partial results.");
303                rate_limited.store(true, Ordering::Relaxed);
304            } else {
305                eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
306            }
307        }
308    }
309    pr
310}
311
312/// Search and enrich PRs with full details
313pub async fn search_and_enrich_prs(
314    client: &Octocrab,
315    query: &str,
316    auth_username: Option<&str>,
317    exclude_patterns: Option<Vec<String>>,
318) -> Result<Vec<PullRequest>> {
319    let prs = search_prs(client, query).await?;
320
321    // Enrich PRs with bounded concurrency
322    const MAX_CONCURRENT_ENRICHMENTS: usize = 10;
323
324    // Rate limit flag shared across concurrent tasks
325    let rate_limited = Arc::new(AtomicBool::new(false));
326
327    let mut futures = FuturesUnordered::new();
328    let mut prs_iter = prs.into_iter();
329    let mut enriched_prs = Vec::new();
330
331    // Fill initial batch
332    for _ in 0..MAX_CONCURRENT_ENRICHMENTS {
333        if let Some(pr) = prs_iter.next() {
334            futures.push(enrich_pr_with_rate_limit_check(
335                client.clone(),
336                pr,
337                rate_limited.clone(),
338                auth_username.map(|s| s.to_string()),
339                exclude_patterns.clone(),
340            ));
341        }
342    }
343
344    // Process results and feed new tasks
345    while let Some(pr) = futures.next().await {
346        enriched_prs.push(pr);
347
348        // Add next PR if not rate limited
349        if !rate_limited.load(Ordering::Relaxed) {
350            if let Some(next_pr) = prs_iter.next() {
351                futures.push(enrich_pr_with_rate_limit_check(
352                    client.clone(),
353                    next_pr,
354                    rate_limited.clone(),
355                    auth_username.map(|s| s.to_string()),
356                    exclude_patterns.clone(),
357                ));
358            }
359        }
360    }
361
362    // Add any remaining unenriched PRs (if rate limited, remaining weren't submitted)
363    enriched_prs.extend(prs_iter);
364
365    Ok(enriched_prs)
366}