Skip to main content

pr_bro/github/
search.rs

1use crate::buffered_eprintln;
2use anyhow::{anyhow, Context, Result};
3use futures::stream::{FuturesUnordered, StreamExt};
4use octocrab::Octocrab;
5use std::sync::atomic::{AtomicBool, Ordering};
6use std::sync::Arc;
7
8use crate::github::types::PullRequest;
9
10/// Search GitHub for pull requests matching the given query.
11/// Auth errors (401 / Bad credentials) fail immediately as a typed AuthError.
12/// Rate limit and permission errors also fail immediately.
13/// Transient/network errors are retried up to 3 times with exponential backoff.
14pub async fn search_prs(client: &Octocrab, query: &str) -> Result<Vec<PullRequest>> {
15    // Ensure the query only returns PRs, not issues
16    let query = if query.contains("is:pr") {
17        query.to_string()
18    } else {
19        format!("{} is:pr", query)
20    };
21
22    let max_retries = 3;
23    let mut attempt = 0;
24
25    loop {
26        attempt += 1;
27        match client
28            .search()
29            .issues_and_pull_requests(&query)
30            .send()
31            .await
32        {
33            Ok(results) => {
34                let prs: Vec<PullRequest> = results
35                    .items
36                    .into_iter()
37                    .filter(|issue| issue.pull_request.is_some()) // Only PRs, not issues
38                    .map(|issue| {
39                        // Extract owner/repo from html_url
40                        // Format: "https://github.com/owner/repo/pull/123"
41                        let path = issue.html_url.path();
42                        let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
43                        let repo = if parts.len() >= 2 {
44                            format!("{}/{}", parts[0], parts[1])
45                        } else {
46                            "unknown/unknown".to_string()
47                        };
48
49                        PullRequest {
50                            title: issue.title,
51                            number: issue.number,
52                            author: issue.user.login.clone(),
53                            repo,
54                            url: issue.html_url.to_string(),
55                            created_at: issue.created_at,
56                            updated_at: issue.updated_at,
57                            additions: 0, // Search API doesn't include these
58                            deletions: 0, // Will be populated by enrichment
59                            approvals: 0, // Requires separate API call
60                            draft: false, // Populated during enrichment from Pulls API
61                            labels: issue.labels.iter().map(|l| l.name.clone()).collect(),
62                            user_has_reviewed: false, // Will be populated by enrichment
63                            filtered_size: None, // Will be set by enrich_pr if exclude patterns configured
64                        }
65                    })
66                    .collect();
67                return Ok(prs);
68            }
69            Err(e) => {
70                let error_str = format!("{:?}", e);
71
72                // Auth errors: fail immediately with typed AuthError (no retry)
73                if error_str.contains("401") || error_str.contains("Bad credentials") {
74                    return Err(crate::fetch::AuthError {
75                        message:
76                            "Authentication failed. Your GitHub token may be invalid or expired."
77                                .to_string(),
78                    }
79                    .into());
80                }
81
82                // Rate limit: fail immediately (caller handles differently)
83                if error_str.contains("rate limit") || error_str.contains("403") {
84                    return Err(anyhow!(
85                        "GitHub API rate limit exceeded. Wait a few minutes and try again."
86                    ));
87                }
88
89                // Permission errors: fail immediately
90                if error_str.contains("do not have permission")
91                    || error_str.contains("resources do not exist")
92                {
93                    return Err(anyhow!("Repository not found or no access. Check repo name and token permissions (needs 'repo' scope for private repos)."));
94                }
95
96                // Transient errors: retry with backoff
97                if attempt >= max_retries {
98                    return Err(anyhow!(
99                        "GitHub API error after {} attempts: {}",
100                        max_retries,
101                        e
102                    ));
103                }
104
105                let delay = std::time::Duration::from_millis(100 * (1 << (attempt - 1))); // 100ms, 200ms, 400ms
106                tokio::time::sleep(delay).await;
107            }
108        }
109    }
110}
111
112/// Fetch PR details (additions, deletions) from the GitHub API
113async fn fetch_pr_details(
114    client: &Octocrab,
115    owner: &str,
116    repo: &str,
117    number: u64,
118) -> Result<(u64, u64, bool)> {
119    let pr = client
120        .pulls(owner, repo)
121        .get(number)
122        .await
123        .context("Failed to fetch PR details")?;
124
125    let additions = pr.additions.unwrap_or(0);
126    let deletions = pr.deletions.unwrap_or(0);
127    let draft = pr.draft.unwrap_or(false);
128
129    Ok((additions, deletions, draft))
130}
131
132/// Fetch PR review count (approved reviews) and check if authenticated user has reviewed
133async fn fetch_pr_reviews(
134    client: &Octocrab,
135    owner: &str,
136    repo: &str,
137    number: u64,
138    auth_username: Option<&str>,
139) -> Result<(u32, bool)> {
140    let reviews = client
141        .pulls(owner, repo)
142        .list_reviews(number)
143        .send()
144        .await
145        .context("Failed to fetch PR reviews")?;
146
147    let approved_count = reviews
148        .items
149        .iter()
150        .filter(|review| {
151            matches!(
152                review.state,
153                Some(octocrab::models::pulls::ReviewState::Approved)
154            )
155        })
156        .count() as u32;
157
158    // Check if authenticated user has reviewed (any review state counts)
159    let user_has_reviewed = auth_username.is_some_and(|username| {
160        reviews.items.iter().any(|r| {
161            r.user
162                .as_ref()
163                .is_some_and(|u| u.login.eq_ignore_ascii_case(username))
164        })
165    });
166
167    Ok((approved_count, user_has_reviewed))
168}
169
170/// Fetch per-file diff data for a PR with pagination.
171/// Returns a list of (filename, additions, deletions) tuples.
172async fn fetch_pr_file_list(
173    client: &Octocrab,
174    owner: &str,
175    repo: &str,
176    number: u64,
177) -> Result<Vec<(String, u64, u64)>> {
178    let page = client
179        .pulls(owner, repo)
180        .list_files(number)
181        .await
182        .context("Failed to fetch PR file list")?;
183
184    let all_files = client
185        .all_pages(page)
186        .await
187        .context("Failed to paginate PR file list")?;
188
189    Ok(all_files
190        .into_iter()
191        .map(|f| (f.filename, f.additions, f.deletions))
192        .collect())
193}
194
195/// Filter files by basename glob matching and compute total size of non-excluded files.
196fn apply_size_exclusions(files: &[(String, u64, u64)], exclude_patterns: &[String]) -> Result<u64> {
197    let compiled: Vec<glob::Pattern> = exclude_patterns
198        .iter()
199        .map(|p| glob::Pattern::new(p).context(format!("Invalid glob pattern: {}", p)))
200        .collect::<Result<Vec<_>>>()?;
201
202    let total = files
203        .iter()
204        .filter(|(filename, _, _)| {
205            let basename = std::path::Path::new(filename)
206                .file_name()
207                .and_then(|n| n.to_str())
208                .unwrap_or(filename);
209            !compiled.iter().any(|pat| pat.matches(basename))
210        })
211        .map(|(_, additions, deletions)| additions + deletions)
212        .sum();
213
214    Ok(total)
215}
216
217/// Enrich a PR with detailed information (size and approvals)
218async fn enrich_pr(
219    client: &Octocrab,
220    pr: &mut PullRequest,
221    auth_username: Option<&str>,
222    exclude_patterns: &Option<Vec<String>>,
223) -> Result<()> {
224    // Parse owner/repo from pr.repo field
225    let parts: Vec<&str> = pr.repo.split('/').collect();
226    if parts.len() != 2 {
227        return Err(anyhow!("Invalid repo format: {}", pr.repo));
228    }
229    let owner = parts[0];
230    let repo_name = parts[1];
231
232    // Fetch details and reviews in parallel
233    let details_fut = fetch_pr_details(client, owner, repo_name, pr.number);
234    let reviews_fut = fetch_pr_reviews(client, owner, repo_name, pr.number, auth_username);
235
236    match tokio::try_join!(details_fut, reviews_fut) {
237        Ok(((additions, deletions, draft), (approvals, user_has_reviewed))) => {
238            pr.additions = additions;
239            pr.deletions = deletions;
240            pr.draft = draft;
241            pr.approvals = approvals;
242            pr.user_has_reviewed = user_has_reviewed;
243
244            // Conditionally fetch per-file data and apply size exclusions
245            if let Some(ref patterns) = exclude_patterns {
246                if !patterns.is_empty() {
247                    match fetch_pr_file_list(client, owner, repo_name, pr.number).await {
248                        Ok(files) => {
249                            match apply_size_exclusions(&files, patterns) {
250                                Ok(filtered) => pr.filtered_size = Some(filtered),
251                                Err(e) => {
252                                    buffered_eprintln!(
253                                        "Warning: Failed to apply size exclusions for PR {}: {}",
254                                        pr.number,
255                                        e
256                                    );
257                                    // Leave filtered_size as None — fallback to aggregate size
258                                }
259                            }
260                        }
261                        Err(e) => {
262                            buffered_eprintln!(
263                                "Warning: Failed to fetch file list for PR {}: {}",
264                                pr.number,
265                                e
266                            );
267                            // Leave filtered_size as None — fallback to aggregate size
268                        }
269                    }
270                }
271            }
272
273            Ok(())
274        }
275        Err(e) => {
276            // If enrichment fails, log but don't fail the whole operation
277            buffered_eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
278            Ok(())
279        }
280    }
281}
282
283/// Helper function for concurrent PR enrichment
284async fn enrich_pr_with_rate_limit_check(
285    client: Octocrab,
286    mut pr: PullRequest,
287    rate_limited: Arc<AtomicBool>,
288    auth_username: Option<String>,
289    exclude_patterns: Option<Vec<String>>,
290) -> PullRequest {
291    if rate_limited.load(Ordering::Relaxed) {
292        return pr; // Skip enrichment if rate limited
293    }
294
295    match enrich_pr(
296        &client,
297        &mut pr,
298        auth_username.as_deref(),
299        &exclude_patterns,
300    )
301    .await
302    {
303        Ok(_) => {}
304        Err(e) => {
305            let err_str = e.to_string();
306            if err_str.contains("rate limit") || err_str.contains("403") {
307                buffered_eprintln!(
308                    "Warning: Rate limit hit during enrichment. Returning partial results."
309                );
310                rate_limited.store(true, Ordering::Relaxed);
311            } else {
312                buffered_eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
313            }
314        }
315    }
316    pr
317}
318
319/// Search and enrich PRs with full details
320pub async fn search_and_enrich_prs(
321    client: &Octocrab,
322    query: &str,
323    auth_username: Option<&str>,
324    exclude_patterns: Option<Vec<String>>,
325) -> Result<Vec<PullRequest>> {
326    let prs = search_prs(client, query).await?;
327
328    // Enrich PRs with bounded concurrency
329    const MAX_CONCURRENT_ENRICHMENTS: usize = 10;
330
331    // Rate limit flag shared across concurrent tasks
332    let rate_limited = Arc::new(AtomicBool::new(false));
333
334    let mut futures = FuturesUnordered::new();
335    let mut prs_iter = prs.into_iter();
336    let mut enriched_prs = Vec::new();
337
338    // Fill initial batch
339    for _ in 0..MAX_CONCURRENT_ENRICHMENTS {
340        if let Some(pr) = prs_iter.next() {
341            futures.push(enrich_pr_with_rate_limit_check(
342                client.clone(),
343                pr,
344                rate_limited.clone(),
345                auth_username.map(|s| s.to_string()),
346                exclude_patterns.clone(),
347            ));
348        }
349    }
350
351    // Process results and feed new tasks
352    while let Some(pr) = futures.next().await {
353        enriched_prs.push(pr);
354
355        // Add next PR if not rate limited
356        if !rate_limited.load(Ordering::Relaxed) {
357            if let Some(next_pr) = prs_iter.next() {
358                futures.push(enrich_pr_with_rate_limit_check(
359                    client.clone(),
360                    next_pr,
361                    rate_limited.clone(),
362                    auth_username.map(|s| s.to_string()),
363                    exclude_patterns.clone(),
364                ));
365            }
366        }
367    }
368
369    // Add any remaining unenriched PRs (if rate limited, remaining weren't submitted)
370    enriched_prs.extend(prs_iter);
371
372    Ok(enriched_prs)
373}