Skip to main content

pr_bro/github/
search.rs

1use crate::buffered_eprintln;
2use anyhow::{anyhow, Context, Result};
3use futures::stream::{FuturesUnordered, StreamExt};
4use octocrab::Octocrab;
5use std::sync::atomic::{AtomicBool, Ordering};
6use std::sync::Arc;
7
8use crate::github::types::PullRequest;
9
10/// Search GitHub for pull requests matching the given query.
11/// Auth errors (401 / Bad credentials) fail immediately as a typed AuthError.
12/// Rate limit and permission errors also fail immediately.
13/// Transient/network errors are retried up to 3 times with exponential backoff.
14pub async fn search_prs(client: &Octocrab, query: &str) -> Result<Vec<PullRequest>> {
15    // Ensure the query only returns PRs, not issues
16    let query = if query.contains("is:pr") {
17        query.to_string()
18    } else {
19        format!("{} is:pr", query)
20    };
21
22    let max_retries = 3;
23    let mut attempt = 0;
24
25    loop {
26        attempt += 1;
27        match client
28            .search()
29            .issues_and_pull_requests(&query)
30            .send()
31            .await
32        {
33            Ok(results) => {
34                let prs: Vec<PullRequest> = results
35                    .items
36                    .into_iter()
37                    .filter(|issue| issue.pull_request.is_some()) // Only PRs, not issues
38                    .map(|issue| {
39                        // Extract owner/repo from html_url
40                        // Format: "https://github.com/owner/repo/pull/123"
41                        let path = issue.html_url.path();
42                        let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
43                        let repo = if parts.len() >= 2 {
44                            format!("{}/{}", parts[0], parts[1])
45                        } else {
46                            "unknown/unknown".to_string()
47                        };
48
49                        PullRequest {
50                            title: issue.title,
51                            number: issue.number,
52                            author: issue.user.login.clone(),
53                            repo,
54                            url: issue.html_url.to_string(),
55                            created_at: issue.created_at,
56                            updated_at: issue.updated_at,
57                            additions: 0, // Search API doesn't include these
58                            deletions: 0, // Will be populated by enrichment
59                            approvals: 0, // Requires separate API call
60                            draft: false, // Search API doesn't expose draft status reliably
61                            labels: issue.labels.iter().map(|l| l.name.clone()).collect(),
62                            user_has_reviewed: false, // Will be populated by enrichment
63                            filtered_size: None, // Will be set by enrich_pr if exclude patterns configured
64                        }
65                    })
66                    .collect();
67                return Ok(prs);
68            }
69            Err(e) => {
70                let error_str = format!("{:?}", e);
71
72                // Auth errors: fail immediately with typed AuthError (no retry)
73                if error_str.contains("401") || error_str.contains("Bad credentials") {
74                    return Err(crate::fetch::AuthError {
75                        message:
76                            "Authentication failed. Your GitHub token may be invalid or expired."
77                                .to_string(),
78                    }
79                    .into());
80                }
81
82                // Rate limit: fail immediately (caller handles differently)
83                if error_str.contains("rate limit") || error_str.contains("403") {
84                    return Err(anyhow!(
85                        "GitHub API rate limit exceeded. Wait a few minutes and try again."
86                    ));
87                }
88
89                // Permission errors: fail immediately
90                if error_str.contains("do not have permission")
91                    || error_str.contains("resources do not exist")
92                {
93                    return Err(anyhow!("Repository not found or no access. Check repo name and token permissions (needs 'repo' scope for private repos)."));
94                }
95
96                // Transient errors: retry with backoff
97                if attempt >= max_retries {
98                    return Err(anyhow!(
99                        "GitHub API error after {} attempts: {}",
100                        max_retries,
101                        e
102                    ));
103                }
104
105                let delay = std::time::Duration::from_millis(100 * (1 << (attempt - 1))); // 100ms, 200ms, 400ms
106                tokio::time::sleep(delay).await;
107            }
108        }
109    }
110}
111
112/// Fetch PR details (additions, deletions) from the GitHub API
113async fn fetch_pr_details(
114    client: &Octocrab,
115    owner: &str,
116    repo: &str,
117    number: u64,
118) -> Result<(u64, u64)> {
119    let pr = client
120        .pulls(owner, repo)
121        .get(number)
122        .await
123        .context("Failed to fetch PR details")?;
124
125    let additions = pr.additions.unwrap_or(0);
126    let deletions = pr.deletions.unwrap_or(0);
127
128    Ok((additions, deletions))
129}
130
131/// Fetch PR review count (approved reviews) and check if authenticated user has reviewed
132async fn fetch_pr_reviews(
133    client: &Octocrab,
134    owner: &str,
135    repo: &str,
136    number: u64,
137    auth_username: Option<&str>,
138) -> Result<(u32, bool)> {
139    let reviews = client
140        .pulls(owner, repo)
141        .list_reviews(number)
142        .send()
143        .await
144        .context("Failed to fetch PR reviews")?;
145
146    let approved_count = reviews
147        .items
148        .iter()
149        .filter(|review| {
150            matches!(
151                review.state,
152                Some(octocrab::models::pulls::ReviewState::Approved)
153            )
154        })
155        .count() as u32;
156
157    // Check if authenticated user has reviewed (any review state counts)
158    let user_has_reviewed = auth_username.is_some_and(|username| {
159        reviews.items.iter().any(|r| {
160            r.user
161                .as_ref()
162                .is_some_and(|u| u.login.eq_ignore_ascii_case(username))
163        })
164    });
165
166    Ok((approved_count, user_has_reviewed))
167}
168
169/// Fetch per-file diff data for a PR with pagination.
170/// Returns a list of (filename, additions, deletions) tuples.
171async fn fetch_pr_file_list(
172    client: &Octocrab,
173    owner: &str,
174    repo: &str,
175    number: u64,
176) -> Result<Vec<(String, u64, u64)>> {
177    let page = client
178        .pulls(owner, repo)
179        .list_files(number)
180        .await
181        .context("Failed to fetch PR file list")?;
182
183    let all_files = client
184        .all_pages(page)
185        .await
186        .context("Failed to paginate PR file list")?;
187
188    Ok(all_files
189        .into_iter()
190        .map(|f| (f.filename, f.additions, f.deletions))
191        .collect())
192}
193
194/// Filter files by basename glob matching and compute total size of non-excluded files.
195fn apply_size_exclusions(files: &[(String, u64, u64)], exclude_patterns: &[String]) -> Result<u64> {
196    let compiled: Vec<glob::Pattern> = exclude_patterns
197        .iter()
198        .map(|p| glob::Pattern::new(p).context(format!("Invalid glob pattern: {}", p)))
199        .collect::<Result<Vec<_>>>()?;
200
201    let total = files
202        .iter()
203        .filter(|(filename, _, _)| {
204            let basename = std::path::Path::new(filename)
205                .file_name()
206                .and_then(|n| n.to_str())
207                .unwrap_or(filename);
208            !compiled.iter().any(|pat| pat.matches(basename))
209        })
210        .map(|(_, additions, deletions)| additions + deletions)
211        .sum();
212
213    Ok(total)
214}
215
216/// Enrich a PR with detailed information (size and approvals)
217async fn enrich_pr(
218    client: &Octocrab,
219    pr: &mut PullRequest,
220    auth_username: Option<&str>,
221    exclude_patterns: &Option<Vec<String>>,
222) -> Result<()> {
223    // Parse owner/repo from pr.repo field
224    let parts: Vec<&str> = pr.repo.split('/').collect();
225    if parts.len() != 2 {
226        return Err(anyhow!("Invalid repo format: {}", pr.repo));
227    }
228    let owner = parts[0];
229    let repo_name = parts[1];
230
231    // Fetch details and reviews in parallel
232    let details_fut = fetch_pr_details(client, owner, repo_name, pr.number);
233    let reviews_fut = fetch_pr_reviews(client, owner, repo_name, pr.number, auth_username);
234
235    match tokio::try_join!(details_fut, reviews_fut) {
236        Ok(((additions, deletions), (approvals, user_has_reviewed))) => {
237            pr.additions = additions;
238            pr.deletions = deletions;
239            pr.approvals = approvals;
240            pr.user_has_reviewed = user_has_reviewed;
241
242            // Conditionally fetch per-file data and apply size exclusions
243            if let Some(ref patterns) = exclude_patterns {
244                if !patterns.is_empty() {
245                    match fetch_pr_file_list(client, owner, repo_name, pr.number).await {
246                        Ok(files) => {
247                            match apply_size_exclusions(&files, patterns) {
248                                Ok(filtered) => pr.filtered_size = Some(filtered),
249                                Err(e) => {
250                                    buffered_eprintln!(
251                                        "Warning: Failed to apply size exclusions for PR {}: {}",
252                                        pr.number,
253                                        e
254                                    );
255                                    // Leave filtered_size as None — fallback to aggregate size
256                                }
257                            }
258                        }
259                        Err(e) => {
260                            buffered_eprintln!(
261                                "Warning: Failed to fetch file list for PR {}: {}",
262                                pr.number,
263                                e
264                            );
265                            // Leave filtered_size as None — fallback to aggregate size
266                        }
267                    }
268                }
269            }
270
271            Ok(())
272        }
273        Err(e) => {
274            // If enrichment fails, log but don't fail the whole operation
275            buffered_eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
276            Ok(())
277        }
278    }
279}
280
281/// Helper function for concurrent PR enrichment
282async fn enrich_pr_with_rate_limit_check(
283    client: Octocrab,
284    mut pr: PullRequest,
285    rate_limited: Arc<AtomicBool>,
286    auth_username: Option<String>,
287    exclude_patterns: Option<Vec<String>>,
288) -> PullRequest {
289    if rate_limited.load(Ordering::Relaxed) {
290        return pr; // Skip enrichment if rate limited
291    }
292
293    match enrich_pr(
294        &client,
295        &mut pr,
296        auth_username.as_deref(),
297        &exclude_patterns,
298    )
299    .await
300    {
301        Ok(_) => {}
302        Err(e) => {
303            let err_str = e.to_string();
304            if err_str.contains("rate limit") || err_str.contains("403") {
305                buffered_eprintln!(
306                    "Warning: Rate limit hit during enrichment. Returning partial results."
307                );
308                rate_limited.store(true, Ordering::Relaxed);
309            } else {
310                buffered_eprintln!("Warning: Failed to enrich PR {}: {}", pr.number, e);
311            }
312        }
313    }
314    pr
315}
316
317/// Search and enrich PRs with full details
318pub async fn search_and_enrich_prs(
319    client: &Octocrab,
320    query: &str,
321    auth_username: Option<&str>,
322    exclude_patterns: Option<Vec<String>>,
323) -> Result<Vec<PullRequest>> {
324    let prs = search_prs(client, query).await?;
325
326    // Enrich PRs with bounded concurrency
327    const MAX_CONCURRENT_ENRICHMENTS: usize = 10;
328
329    // Rate limit flag shared across concurrent tasks
330    let rate_limited = Arc::new(AtomicBool::new(false));
331
332    let mut futures = FuturesUnordered::new();
333    let mut prs_iter = prs.into_iter();
334    let mut enriched_prs = Vec::new();
335
336    // Fill initial batch
337    for _ in 0..MAX_CONCURRENT_ENRICHMENTS {
338        if let Some(pr) = prs_iter.next() {
339            futures.push(enrich_pr_with_rate_limit_check(
340                client.clone(),
341                pr,
342                rate_limited.clone(),
343                auth_username.map(|s| s.to_string()),
344                exclude_patterns.clone(),
345            ));
346        }
347    }
348
349    // Process results and feed new tasks
350    while let Some(pr) = futures.next().await {
351        enriched_prs.push(pr);
352
353        // Add next PR if not rate limited
354        if !rate_limited.load(Ordering::Relaxed) {
355            if let Some(next_pr) = prs_iter.next() {
356                futures.push(enrich_pr_with_rate_limit_check(
357                    client.clone(),
358                    next_pr,
359                    rate_limited.clone(),
360                    auth_username.map(|s| s.to_string()),
361                    exclude_patterns.clone(),
362                ));
363            }
364        }
365    }
366
367    // Add any remaining unenriched PRs (if rate limited, remaining weren't submitted)
368    enriched_prs.extend(prs_iter);
369
370    Ok(enriched_prs)
371}