1use std::thread;
2use std::time::Duration;
3
4use anyhow::{Context, Result, bail};
5use reqwest::blocking::Client;
6use reqwest::header::{ACCEPT, AUTHORIZATION, HeaderMap, HeaderValue, RETRY_AFTER, USER_AGENT};
7use reqwest::{StatusCode, blocking::Response};
8use serde::de::DeserializeOwned;
9
10use crate::config::GitHubConfig;
11
12const MAX_BODY_SIZE: usize = 10 * 1024 * 1024; const MAX_PAGES: usize = 10;
14const MAX_HTTP_ATTEMPTS: usize = 3;
15const INITIAL_RETRY_DELAY_MS: u64 = 250;
16
17pub struct TreeSearchResult {
19 pub paths: Vec<String>,
20 pub truncated: bool,
23}
24
25pub struct GitHubClient {
26 client: Client,
27 base_url: String,
28}
29
30impl GitHubClient {
31 pub fn new(cfg: &GitHubConfig) -> Result<Self> {
32 Self::with_user_agent(cfg, "libverify-github/0.1.0")
33 }
34
35 pub fn with_user_agent(cfg: &GitHubConfig, user_agent: &str) -> Result<Self> {
36 let mut headers = HeaderMap::new();
37 headers.insert(
38 AUTHORIZATION,
39 HeaderValue::from_str(&format!("Bearer {}", cfg.token)).context("invalid token")?,
40 );
41 headers.insert(
42 ACCEPT,
43 HeaderValue::from_static("application/vnd.github.v3+json"),
44 );
45 headers.insert(
46 "X-GitHub-Api-Version",
47 HeaderValue::from_static("2022-11-28"),
48 );
49 headers.insert(
50 USER_AGENT,
51 HeaderValue::from_str(user_agent).context("invalid User-Agent")?,
52 );
53
54 let client = Client::builder()
55 .default_headers(headers)
56 .build()
57 .context("failed to create HTTP client")?;
58
59 Ok(Self {
60 client,
61 base_url: format!("https://{}", cfg.host),
62 })
63 }
64
65 pub fn get_file_content(
69 &self,
70 owner: &str,
71 repo: &str,
72 path: &str,
73 ref_sha: &str,
74 ) -> Result<String> {
75 let url = format!(
76 "{}/repos/{owner}/{repo}/contents/{path}?ref={ref_sha}",
77 self.base_url
78 );
79 let resp = self
80 .client
81 .get(&url)
82 .header("Accept", "application/vnd.github.raw+json")
83 .send()
84 .context("failed to fetch file content")?;
85
86 if !resp.status().is_success() {
87 bail!(
88 "failed to fetch {path}: {} {}",
89 resp.status().as_u16(),
90 resp.status().canonical_reason().unwrap_or("Unknown"),
91 );
92 }
93
94 resp.text().context("failed to read file content")
95 }
96
97 pub fn find_files_in_tree(
103 &self,
104 owner: &str,
105 repo: &str,
106 ref_sha: &str,
107 filter: impl Fn(&str) -> bool,
108 ) -> Result<TreeSearchResult> {
109 let path = format!("/repos/{owner}/{repo}/git/trees/{ref_sha}?recursive=1");
110 let body = self.get(&path)?;
111 let tree: serde_json::Value = serde_json::from_str(&body)?;
112
113 let truncated = tree
114 .get("truncated")
115 .and_then(|t| t.as_bool())
116 .unwrap_or(false);
117
118 let paths = tree
119 .get("tree")
120 .and_then(|t| t.as_array())
121 .map(|entries| {
122 entries
123 .iter()
124 .filter_map(|entry| {
125 let path = entry.get("path")?.as_str()?;
126 let entry_type = entry.get("type")?.as_str()?;
127 if entry_type == "blob" && filter(path) {
128 Some(path.to_string())
129 } else {
130 None
131 }
132 })
133 .collect()
134 })
135 .unwrap_or_default();
136
137 Ok(TreeSearchResult { paths, truncated })
138 }
139
140 pub fn get(&self, path: &str) -> Result<String> {
142 let (body, _) = self.get_internal(path)?;
143 Ok(body)
144 }
145
146 pub fn get_with_link(&self, path: &str) -> Result<(String, Option<String>)> {
148 self.get_internal(path)
149 }
150
151 pub fn paginate<T: DeserializeOwned>(&self, initial_path: &str) -> Result<Vec<T>> {
153 let mut all_items: Vec<T> = Vec::new();
154 let mut current_path = initial_path.to_string();
155
156 for _ in 0..MAX_PAGES {
157 let (body, next_path) = self.get_with_link(¤t_path)?;
158 let items: Vec<T> =
159 serde_json::from_str(&body).context("failed to parse paginated response")?;
160 all_items.extend(items);
161
162 match next_path {
163 Some(next) => current_path = next,
164 None => break,
165 }
166 }
167
168 Ok(all_items)
169 }
170
171 pub fn paginate_search<T: DeserializeOwned>(&self, initial_path: &str) -> Result<Vec<T>> {
173 use crate::types::SearchResponse;
174
175 let mut all_items: Vec<T> = Vec::new();
176 let mut current_path = initial_path.to_string();
177
178 for _ in 0..MAX_PAGES {
179 let (body, next_path) = self.get_with_link(¤t_path)?;
180 let resp: SearchResponse<T> =
181 serde_json::from_str(&body).context("failed to parse search response")?;
182 all_items.extend(resp.items);
183
184 match next_path {
185 Some(next) => current_path = next,
186 None => break,
187 }
188 }
189
190 Ok(all_items)
191 }
192
193 pub fn post_graphql(
195 &self,
196 query: &str,
197 variables: Option<&serde_json::Value>,
198 ) -> Result<String> {
199 let url = format!("{}/graphql", self.base_url);
200 let body = match variables {
201 Some(vars) => serde_json::json!({ "query": query, "variables": vars }),
202 None => serde_json::json!({ "query": query }),
203 };
204
205 for attempt in 0..MAX_HTTP_ATTEMPTS {
206 match self.client.post(&url).json(&body).send() {
207 Ok(resp) => {
208 let status = resp.status();
209 let retry_after_secs = parse_retry_after_secs(resp.headers().get(RETRY_AFTER));
210
211 if !status.is_success() {
212 if should_retry_status(status) && attempt + 1 < MAX_HTTP_ATTEMPTS {
213 thread::sleep(retry_delay_for(attempt, retry_after_secs));
214 continue;
215 }
216 bail!(
217 "GitHub GraphQL error: {} {}",
218 status.as_u16(),
219 status.canonical_reason().unwrap_or("Unknown")
220 );
221 }
222
223 let text = resp.text().context("failed to read GraphQL response")?;
224 if text.len() > MAX_BODY_SIZE {
225 bail!("GraphQL response too large: {} bytes", text.len());
226 }
227 return Ok(text);
228 }
229 Err(_err) if attempt + 1 < MAX_HTTP_ATTEMPTS => {
230 thread::sleep(retry_delay_for(attempt, None));
231 }
232 Err(err) => return Err(err).context("GraphQL request failed"),
233 }
234 }
235
236 bail!("GraphQL request exhausted retry attempts")
237 }
238
239 fn get_internal(&self, path: &str) -> Result<(String, Option<String>)> {
240 let url = format!("{}{}", self.base_url, path);
241 for attempt in 0..MAX_HTTP_ATTEMPTS {
242 match self.client.get(&url).send() {
243 Ok(resp) => {
244 let status = resp.status();
245 let retry_after_secs = parse_retry_after_secs(resp.headers().get(RETRY_AFTER));
246
247 if !status.is_success() {
248 if should_retry_status(status) && attempt + 1 < MAX_HTTP_ATTEMPTS {
249 thread::sleep(retry_delay_for(attempt, retry_after_secs));
250 continue;
251 }
252
253 bail!(
254 "GitHub API error: {} {}",
255 status.as_u16(),
256 status.canonical_reason().unwrap_or("Unknown")
257 );
258 }
259
260 return parse_success_response(resp, &self.base_url);
261 }
262 Err(_err) if attempt + 1 < MAX_HTTP_ATTEMPTS => {
263 thread::sleep(retry_delay_for(attempt, None));
264 }
265 Err(err) => return Err(err).context("HTTP request failed"),
266 }
267 }
268
269 bail!("GitHub API request exhausted retry attempts")
270 }
271}
272
273fn parse_success_response(resp: Response, base_url: &str) -> Result<(String, Option<String>)> {
274 let next_url = resp
275 .headers()
276 .get("link")
277 .and_then(|v| v.to_str().ok())
278 .and_then(|link| parse_link_next(link, base_url));
279
280 let body = resp.text().context("failed to read response body")?;
281 if body.len() > MAX_BODY_SIZE {
282 bail!("response too large: {} bytes", body.len());
283 }
284
285 Ok((body, next_url))
286}
287
288fn should_retry_status(status: StatusCode) -> bool {
289 status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()
290}
291
292fn parse_retry_after_secs(value: Option<&HeaderValue>) -> Option<u64> {
293 value?.to_str().ok()?.parse::<u64>().ok()
294}
295
296fn retry_delay_for(attempt: usize, retry_after_secs: Option<u64>) -> Duration {
297 if let Some(seconds) = retry_after_secs {
298 return Duration::from_secs(seconds);
299 }
300
301 let multiplier = 1u64 << attempt.min(10);
302 Duration::from_millis(INITIAL_RETRY_DELAY_MS.saturating_mul(multiplier))
303}
304
305fn parse_link_next(link_header: &str, base_prefix: &str) -> Option<String> {
307 for part in link_header.split(',') {
308 let part = part.trim();
309 if !part.contains("rel=\"next\"") {
310 continue;
311 }
312 let lt = part.find('<')?;
313 let gt = part.find('>')?;
314 let url = &part[lt + 1..gt];
315 if let Some(path) = url.strip_prefix(base_prefix) {
316 return Some(path.to_string());
317 }
318 return Some(url.to_string());
319 }
320 None
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326
327 #[test]
328 fn parse_link_next_extracts_path() {
329 let header = r#"<https://api.github.com/repos/o/r/pulls/1/files?page=2>; rel="next", <https://api.github.com/repos/o/r/pulls/1/files?page=5>; rel="last""#;
330 let result = parse_link_next(header, "https://api.github.com");
331 assert_eq!(result, Some("/repos/o/r/pulls/1/files?page=2".to_string()));
332 }
333
334 #[test]
335 fn parse_link_next_returns_none_without_next() {
336 let header = r#"<https://api.github.com/repos/o/r/pulls/1/files?page=5>; rel="last""#;
337 let result = parse_link_next(header, "https://api.github.com");
338 assert!(result.is_none());
339 }
340
341 #[test]
342 fn should_retry_server_errors_and_rate_limits() {
343 assert!(should_retry_status(StatusCode::INTERNAL_SERVER_ERROR));
344 assert!(should_retry_status(StatusCode::SERVICE_UNAVAILABLE));
345 assert!(should_retry_status(StatusCode::TOO_MANY_REQUESTS));
346 assert!(!should_retry_status(StatusCode::NOT_FOUND));
347 }
348
349 #[test]
350 fn parse_retry_after_secs_reads_integer_seconds() {
351 let value = HeaderValue::from_static("7");
352 assert_eq!(parse_retry_after_secs(Some(&value)), Some(7));
353 }
354
355 #[test]
356 fn retry_delay_for_uses_exponential_backoff() {
357 assert_eq!(retry_delay_for(0, None), Duration::from_millis(250));
358 assert_eq!(retry_delay_for(1, None), Duration::from_millis(500));
359 assert_eq!(retry_delay_for(2, None), Duration::from_millis(1000));
360 }
361
362 #[test]
363 fn retry_delay_for_prefers_retry_after() {
364 assert_eq!(retry_delay_for(2, Some(7)), Duration::from_secs(7));
365 }
366}