Skip to main content

libverify_github/
client.rs

1use std::thread;
2use std::time::Duration;
3
4use anyhow::{Context, Result, bail};
5use reqwest::blocking::Client;
6use reqwest::header::{ACCEPT, AUTHORIZATION, HeaderMap, HeaderValue, RETRY_AFTER, USER_AGENT};
7use reqwest::{StatusCode, blocking::Response};
8use serde::de::DeserializeOwned;
9
10use crate::config::GitHubConfig;
11
12const MAX_BODY_SIZE: usize = 10 * 1024 * 1024; // 10MB
13const MAX_PAGES: usize = 10;
14const MAX_HTTP_ATTEMPTS: usize = 3;
15const INITIAL_RETRY_DELAY_MS: u64 = 250;
16
17/// Result of a tree search: matched paths and whether the tree was truncated.
18pub struct TreeSearchResult {
19    pub paths: Vec<String>,
20    /// True if the GitHub API truncated the tree (>100k entries).
21    /// Some matching files may be missing from `paths`.
22    pub truncated: bool,
23}
24
25pub struct GitHubClient {
26    client: Client,
27    base_url: String,
28}
29
30impl GitHubClient {
31    pub fn new(cfg: &GitHubConfig) -> Result<Self> {
32        Self::with_user_agent(cfg, "libverify-github/0.1.0")
33    }
34
35    pub fn with_user_agent(cfg: &GitHubConfig, user_agent: &str) -> Result<Self> {
36        let mut headers = HeaderMap::new();
37        headers.insert(
38            AUTHORIZATION,
39            HeaderValue::from_str(&format!("Bearer {}", cfg.token)).context("invalid token")?,
40        );
41        headers.insert(
42            ACCEPT,
43            HeaderValue::from_static("application/vnd.github.v3+json"),
44        );
45        headers.insert(
46            "X-GitHub-Api-Version",
47            HeaderValue::from_static("2022-11-28"),
48        );
49        headers.insert(
50            USER_AGENT,
51            HeaderValue::from_str(user_agent).context("invalid User-Agent")?,
52        );
53
54        let client = Client::builder()
55            .default_headers(headers)
56            .build()
57            .context("failed to create HTTP client")?;
58
59        Ok(Self {
60            client,
61            base_url: format!("https://{}", cfg.host),
62        })
63    }
64
65    /// Fetch raw file content from a repository at a specific ref.
66    ///
67    /// Uses the GitHub raw content media type to avoid base64 encoding.
68    pub fn get_file_content(
69        &self,
70        owner: &str,
71        repo: &str,
72        path: &str,
73        ref_sha: &str,
74    ) -> Result<String> {
75        let url = format!(
76            "{}/repos/{owner}/{repo}/contents/{path}?ref={ref_sha}",
77            self.base_url
78        );
79        let resp = self
80            .client
81            .get(&url)
82            .header("Accept", "application/vnd.github.raw+json")
83            .send()
84            .context("failed to fetch file content")?;
85
86        if !resp.status().is_success() {
87            bail!(
88                "failed to fetch {path}: {} {}",
89                resp.status().as_u16(),
90                resp.status().canonical_reason().unwrap_or("Unknown"),
91            );
92        }
93
94        resp.text().context("failed to read file content")
95    }
96
97    /// List all file paths in a repository tree at a given ref using the Git Tree API.
98    ///
99    /// Returns paths matching `filter` predicate and a `truncated` flag.
100    /// When `truncated` is true, the tree exceeded GitHub's limit and some
101    /// files may be missing — callers should treat this as partial evidence.
102    pub fn find_files_in_tree(
103        &self,
104        owner: &str,
105        repo: &str,
106        ref_sha: &str,
107        filter: impl Fn(&str) -> bool,
108    ) -> Result<TreeSearchResult> {
109        let path = format!("/repos/{owner}/{repo}/git/trees/{ref_sha}?recursive=1");
110        let body = self.get(&path)?;
111        let tree: serde_json::Value = serde_json::from_str(&body)?;
112
113        let truncated = tree
114            .get("truncated")
115            .and_then(|t| t.as_bool())
116            .unwrap_or(false);
117
118        let paths = tree
119            .get("tree")
120            .and_then(|t| t.as_array())
121            .map(|entries| {
122                entries
123                    .iter()
124                    .filter_map(|entry| {
125                        let path = entry.get("path")?.as_str()?;
126                        let entry_type = entry.get("type")?.as_str()?;
127                        if entry_type == "blob" && filter(path) {
128                            Some(path.to_string())
129                        } else {
130                            None
131                        }
132                    })
133                    .collect()
134            })
135            .unwrap_or_default();
136
137        Ok(TreeSearchResult { paths, truncated })
138    }
139
140    /// GET request returning body as string.
141    pub fn get(&self, path: &str) -> Result<String> {
142        let (body, _) = self.get_internal(path)?;
143        Ok(body)
144    }
145
146    /// GET request with pagination support. Returns (body, next_url).
147    pub fn get_with_link(&self, path: &str) -> Result<(String, Option<String>)> {
148        self.get_internal(path)
149    }
150
151    /// Paginate a GitHub API endpoint, collecting all items across pages.
152    pub fn paginate<T: DeserializeOwned>(&self, initial_path: &str) -> Result<Vec<T>> {
153        let mut all_items: Vec<T> = Vec::new();
154        let mut current_path = initial_path.to_string();
155
156        for _ in 0..MAX_PAGES {
157            let (body, next_path) = self.get_with_link(&current_path)?;
158            let items: Vec<T> =
159                serde_json::from_str(&body).context("failed to parse paginated response")?;
160            all_items.extend(items);
161
162            match next_path {
163                Some(next) => current_path = next,
164                None => break,
165            }
166        }
167
168        Ok(all_items)
169    }
170
171    /// Paginate a GitHub Search API endpoint whose response wraps items in `{ items: [...] }`.
172    pub fn paginate_search<T: DeserializeOwned>(&self, initial_path: &str) -> Result<Vec<T>> {
173        use crate::types::SearchResponse;
174
175        let mut all_items: Vec<T> = Vec::new();
176        let mut current_path = initial_path.to_string();
177
178        for _ in 0..MAX_PAGES {
179            let (body, next_path) = self.get_with_link(&current_path)?;
180            let resp: SearchResponse<T> =
181                serde_json::from_str(&body).context("failed to parse search response")?;
182            all_items.extend(resp.items);
183
184            match next_path {
185                Some(next) => current_path = next,
186                None => break,
187            }
188        }
189
190        Ok(all_items)
191    }
192
193    /// POST a GraphQL query and return the response body.
194    pub fn post_graphql(
195        &self,
196        query: &str,
197        variables: Option<&serde_json::Value>,
198    ) -> Result<String> {
199        let url = format!("{}/graphql", self.base_url);
200        let body = match variables {
201            Some(vars) => serde_json::json!({ "query": query, "variables": vars }),
202            None => serde_json::json!({ "query": query }),
203        };
204
205        for attempt in 0..MAX_HTTP_ATTEMPTS {
206            match self.client.post(&url).json(&body).send() {
207                Ok(resp) => {
208                    let status = resp.status();
209                    let retry_after_secs = parse_retry_after_secs(resp.headers().get(RETRY_AFTER));
210
211                    if !status.is_success() {
212                        if should_retry_status(status) && attempt + 1 < MAX_HTTP_ATTEMPTS {
213                            thread::sleep(retry_delay_for(attempt, retry_after_secs));
214                            continue;
215                        }
216                        bail!(
217                            "GitHub GraphQL error: {} {}",
218                            status.as_u16(),
219                            status.canonical_reason().unwrap_or("Unknown")
220                        );
221                    }
222
223                    let text = resp.text().context("failed to read GraphQL response")?;
224                    if text.len() > MAX_BODY_SIZE {
225                        bail!("GraphQL response too large: {} bytes", text.len());
226                    }
227                    return Ok(text);
228                }
229                Err(_err) if attempt + 1 < MAX_HTTP_ATTEMPTS => {
230                    thread::sleep(retry_delay_for(attempt, None));
231                }
232                Err(err) => return Err(err).context("GraphQL request failed"),
233            }
234        }
235
236        bail!("GraphQL request exhausted retry attempts")
237    }
238
239    fn get_internal(&self, path: &str) -> Result<(String, Option<String>)> {
240        let url = format!("{}{}", self.base_url, path);
241        for attempt in 0..MAX_HTTP_ATTEMPTS {
242            match self.client.get(&url).send() {
243                Ok(resp) => {
244                    let status = resp.status();
245                    let retry_after_secs = parse_retry_after_secs(resp.headers().get(RETRY_AFTER));
246
247                    if !status.is_success() {
248                        if should_retry_status(status) && attempt + 1 < MAX_HTTP_ATTEMPTS {
249                            thread::sleep(retry_delay_for(attempt, retry_after_secs));
250                            continue;
251                        }
252
253                        bail!(
254                            "GitHub API error: {} {}",
255                            status.as_u16(),
256                            status.canonical_reason().unwrap_or("Unknown")
257                        );
258                    }
259
260                    return parse_success_response(resp, &self.base_url);
261                }
262                Err(_err) if attempt + 1 < MAX_HTTP_ATTEMPTS => {
263                    thread::sleep(retry_delay_for(attempt, None));
264                }
265                Err(err) => return Err(err).context("HTTP request failed"),
266            }
267        }
268
269        bail!("GitHub API request exhausted retry attempts")
270    }
271}
272
273fn parse_success_response(resp: Response, base_url: &str) -> Result<(String, Option<String>)> {
274    let next_url = resp
275        .headers()
276        .get("link")
277        .and_then(|v| v.to_str().ok())
278        .and_then(|link| parse_link_next(link, base_url));
279
280    let body = resp.text().context("failed to read response body")?;
281    if body.len() > MAX_BODY_SIZE {
282        bail!("response too large: {} bytes", body.len());
283    }
284
285    Ok((body, next_url))
286}
287
288fn should_retry_status(status: StatusCode) -> bool {
289    status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()
290}
291
292fn parse_retry_after_secs(value: Option<&HeaderValue>) -> Option<u64> {
293    value?.to_str().ok()?.parse::<u64>().ok()
294}
295
296fn retry_delay_for(attempt: usize, retry_after_secs: Option<u64>) -> Duration {
297    if let Some(seconds) = retry_after_secs {
298        return Duration::from_secs(seconds);
299    }
300
301    let multiplier = 1u64 << attempt.min(10);
302    Duration::from_millis(INITIAL_RETRY_DELAY_MS.saturating_mul(multiplier))
303}
304
305/// Extract the path for rel="next" from a Link header.
306fn parse_link_next(link_header: &str, base_prefix: &str) -> Option<String> {
307    for part in link_header.split(',') {
308        let part = part.trim();
309        if !part.contains("rel=\"next\"") {
310            continue;
311        }
312        let lt = part.find('<')?;
313        let gt = part.find('>')?;
314        let url = &part[lt + 1..gt];
315        if let Some(path) = url.strip_prefix(base_prefix) {
316            return Some(path.to_string());
317        }
318        return Some(url.to_string());
319    }
320    None
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn parse_link_next_extracts_path() {
329        let header = r#"<https://api.github.com/repos/o/r/pulls/1/files?page=2>; rel="next", <https://api.github.com/repos/o/r/pulls/1/files?page=5>; rel="last""#;
330        let result = parse_link_next(header, "https://api.github.com");
331        assert_eq!(result, Some("/repos/o/r/pulls/1/files?page=2".to_string()));
332    }
333
334    #[test]
335    fn parse_link_next_returns_none_without_next() {
336        let header = r#"<https://api.github.com/repos/o/r/pulls/1/files?page=5>; rel="last""#;
337        let result = parse_link_next(header, "https://api.github.com");
338        assert!(result.is_none());
339    }
340
341    #[test]
342    fn should_retry_server_errors_and_rate_limits() {
343        assert!(should_retry_status(StatusCode::INTERNAL_SERVER_ERROR));
344        assert!(should_retry_status(StatusCode::SERVICE_UNAVAILABLE));
345        assert!(should_retry_status(StatusCode::TOO_MANY_REQUESTS));
346        assert!(!should_retry_status(StatusCode::NOT_FOUND));
347    }
348
349    #[test]
350    fn parse_retry_after_secs_reads_integer_seconds() {
351        let value = HeaderValue::from_static("7");
352        assert_eq!(parse_retry_after_secs(Some(&value)), Some(7));
353    }
354
355    #[test]
356    fn retry_delay_for_uses_exponential_backoff() {
357        assert_eq!(retry_delay_for(0, None), Duration::from_millis(250));
358        assert_eq!(retry_delay_for(1, None), Duration::from_millis(500));
359        assert_eq!(retry_delay_for(2, None), Duration::from_millis(1000));
360    }
361
362    #[test]
363    fn retry_delay_for_prefers_retry_after() {
364        assert_eq!(retry_delay_for(2, Some(7)), Duration::from_secs(7));
365    }
366}