Skip to main content

skillfile_sources/
http.rs

1use std::process::Command;
2use std::sync::OnceLock;
3
4use skillfile_core::error::SkillfileError;
5
6// ---------------------------------------------------------------------------
7// GitHub token discovery (cached for process lifetime)
8// ---------------------------------------------------------------------------
9
10static TOKEN_CACHE: OnceLock<Option<String>> = OnceLock::new();
11
12/// Token injected from the CLI config file before any command runs.
13///
14/// The CLI crate reads the config file and calls [`set_config_token`] once at
15/// startup. This keeps the `sources` crate free of any dependency on `cli`.
16static CONFIG_TOKEN: OnceLock<Option<String>> = OnceLock::new();
17
18/// Inject a GitHub token read from the user config file.
19///
20/// Must be called before the first use of [`github_token`]. Subsequent calls
21/// are ignored (the `OnceLock` is already set).
22pub fn set_config_token(token: Option<String>) {
23    let _ = CONFIG_TOKEN.set(token);
24}
25
26/// Opaque GitHub token handle.
27///
28/// The raw token string is **not publicly accessible**. The only way to
29/// extract it is [`GithubToken::for_url`], which gates on
30/// `is_github_url` — making it structurally impossible to leak the
31/// token to non-GitHub domains.
32pub struct GithubToken(Option<&'static str>);
33
34impl GithubToken {
35    /// Extract the token string only for GitHub domains.
36    ///
37    /// Returns `None` when the URL is not a GitHub domain or when no
38    /// token is available. This is the **only** way to obtain the raw
39    /// token value.
40    #[must_use]
41    pub fn for_url(&self, url: &str) -> Option<&'static str> {
42        is_github_url(url).then_some(self.0).flatten()
43    }
44}
45
46/// Discover a GitHub token from environment or `gh` CLI. Cached after first call.
47///
48/// Returns an opaque [`GithubToken`] — the raw value can only be
49/// extracted via [`GithubToken::for_url`] for GitHub domains.
50#[must_use]
51pub fn github_token() -> GithubToken {
52    GithubToken(TOKEN_CACHE.get_or_init(discover_github_token).as_deref())
53}
54
55fn env_token(name: &str) -> Option<String> {
56    std::env::var(name).ok().filter(|t| !t.is_empty())
57}
58
59fn gh_cli_token() -> Option<String> {
60    let output = Command::new("gh").args(["auth", "token"]).output().ok()?;
61    if !output.status.success() {
62        return None;
63    }
64    let token = String::from_utf8_lossy(&output.stdout).trim().to_string();
65    (!token.is_empty()).then_some(token)
66}
67
68fn discover_github_token() -> Option<String> {
69    if let Some(token) = env_token("GITHUB_TOKEN") {
70        return Some(token);
71    }
72    if let Some(token) = env_token("GH_TOKEN") {
73        return Some(token);
74    }
75    // Config-file token injected by the CLI crate before commands run.
76    if let Some(Some(token)) = CONFIG_TOKEN.get() {
77        if !token.is_empty() {
78            return Some(token.clone());
79        }
80    }
81    gh_cli_token()
82}
83
84// ---------------------------------------------------------------------------
85// HttpClient trait — abstraction over HTTP GET for testability
86// ---------------------------------------------------------------------------
87
88pub struct BearerPost<'a> {
89    pub url: &'a str,
90    pub body: &'a str,
91    pub token: &'a str,
92}
93
94/// Contract for HTTP GET requests used by the fetcher/resolver layer.
95///
96/// Implementations are responsible for:
97/// - Setting standard headers (User-Agent, Authorization)
98/// - Connection pooling / agent reuse
99/// - Error mapping to [`SkillfileError`]
100///
101/// The trait has three methods covering the HTTP patterns in this codebase:
102/// - `get_bytes`: raw file downloads (content from `raw.githubusercontent.com`)
103/// - `get_json`: GitHub API calls that may return 4xx gracefully
104/// - `post_json`: POST with JSON body (used by some registry APIs)
105pub trait HttpClient: Send + Sync {
106    /// Returns `Err(SkillfileError::Network)` on HTTP errors (including 404).
107    fn get_bytes(&self, url: &str) -> Result<Vec<u8>, SkillfileError>;
108
109    /// GET a URL with `Accept: application/vnd.github.v3+json` header.
110    ///
111    /// Returns `Ok(None)` on 4xx client errors (used for tentative lookups
112    /// like SHA resolution where a missing ref is not fatal).
113    /// Returns `Err` on network/server errors.
114    fn get_json(&self, url: &str) -> Result<Option<String>, SkillfileError>;
115
116    /// POST a JSON body to a URL and return the response body as bytes.
117    ///
118    /// Returns `Err(SkillfileError::Network)` on HTTP or network errors.
119    fn post_json(&self, url: &str, body: &str) -> Result<Vec<u8>, SkillfileError>;
120
121    /// POST with a custom `Authorization: Bearer` header (for non-GitHub APIs).
122    ///
123    /// Default: ignores the token and delegates to [`post_json`](Self::post_json).
124    /// Test mocks use this default; [`UreqClient`] overrides to send the header.
125    ///
126    /// # Note
127    /// The extra `token` parameter is required by non-GitHub registry APIs (e.g.
128    /// skillhub.club).
129    fn post_json_with_bearer(&self, req: &BearerPost<'_>) -> Result<Vec<u8>, SkillfileError> {
130        self.post_json(req.url, req.body)
131    }
132}
133
134// ---------------------------------------------------------------------------
135// GitHub URL allowlist — tokens must never leave GitHub domains
136// ---------------------------------------------------------------------------
137
138/// Returns `true` if `url` targets a GitHub domain that should receive the
139/// GitHub `Authorization` header.
140///
141/// Only exact host matches are accepted — subdomain tricks like
142/// `api.github.com.evil.com` are rejected.
143fn is_github_url(url: &str) -> bool {
144    // Accept both https:// and http:// schemes. In practice only HTTPS URLs
145    // are constructed, but accepting HTTP is fail-safe: the token is attached
146    // only if the *host* matches, and ureq will negotiate TLS regardless.
147    let host = url
148        .strip_prefix("https://")
149        .or_else(|| url.strip_prefix("http://"))
150        .and_then(|s| s.split('/').next())
151        .unwrap_or("");
152    matches!(host, "api.github.com" | "raw.githubusercontent.com")
153}
154
155// ---------------------------------------------------------------------------
156// UreqClient — the production implementation backed by ureq
157// ---------------------------------------------------------------------------
158
159fn read_response_text(body: &mut ureq::Body, url: &str) -> Result<String, SkillfileError> {
160    body.read_to_string()
161        .map_err(|e| SkillfileError::Network(format!("failed to read response from {url}: {e}")))
162}
163
164/// Production HTTP client backed by `ureq::Agent`.
165///
166/// Attaches `User-Agent` to every request. GitHub `Authorization` header
167/// is only sent to GitHub domains (`api.github.com`,
168/// `raw.githubusercontent.com`) — never to third-party registries.
169pub struct UreqClient {
170    agent: ureq::Agent,
171}
172
173impl UreqClient {
174    pub fn new() -> Self {
175        let config = ureq::config::Config::builder()
176            // Preserve Authorization header on same-host HTTPS redirects.
177            // GitHub returns 301 for renamed repos (api.github.com -> api.github.com);
178            // the default (Never) strips auth, causing 401 on the redirect target.
179            .redirect_auth_headers(ureq::config::RedirectAuthHeaders::SameHost)
180            .build();
181        Self {
182            agent: ureq::Agent::new_with_config(config),
183        }
184    }
185
186    fn build_get(&self, url: &str) -> ureq::RequestBuilder<ureq::typestate::WithoutBody> {
187        let mut req = self.agent.get(url).header("User-Agent", "skillfile/1.0");
188        if let Some(token) = github_token().for_url(url) {
189            req = req.header("Authorization", &format!("Bearer {token}"));
190        }
191        req
192    }
193
194    fn build_post(&self, url: &str) -> ureq::RequestBuilder<ureq::typestate::WithBody> {
195        let mut req = self.agent.post(url).header("User-Agent", "skillfile/1.0");
196        if let Some(token) = github_token().for_url(url) {
197            req = req.header("Authorization", &format!("Bearer {token}"));
198        }
199        req
200    }
201}
202
203impl Default for UreqClient {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209impl HttpClient for UreqClient {
210    fn get_bytes(&self, url: &str) -> Result<Vec<u8>, SkillfileError> {
211        let mut response = self.build_get(url).call().map_err(|e| match &e {
212            ureq::Error::StatusCode(404) => SkillfileError::Network(format!(
213                "HTTP 404: {url} not found — check that the path exists in the upstream repo"
214            )),
215            ureq::Error::StatusCode(code) => {
216                SkillfileError::Network(format!("HTTP {code} fetching {url}"))
217            }
218            _ => SkillfileError::Network(format!("{e} fetching {url}")),
219        })?;
220        response.body_mut().read_to_vec().map_err(|e| {
221            SkillfileError::Network(format!("failed to read response from {url}: {e}"))
222        })
223    }
224
225    fn get_json(&self, url: &str) -> Result<Option<String>, SkillfileError> {
226        let result = self
227            .build_get(url)
228            .header("Accept", "application/vnd.github.v3+json")
229            .call();
230
231        match result {
232            Ok(mut response) => read_response_text(response.body_mut(), url).map(Some),
233            // 404/422 = ref or repo doesn't exist (tentative lookup, not fatal).
234            // 403 = rate-limited or forbidden; 401 = bad token — surface these.
235            Err(ureq::Error::StatusCode(code)) if code == 404 || code == 422 => Ok(None),
236            Err(ureq::Error::StatusCode(403)) => Err(SkillfileError::Network(format!(
237                "HTTP 403 fetching {url} — you may be rate-limited. \
238                 Set GITHUB_TOKEN or run `gh auth login` to authenticate."
239            ))),
240            Err(e) => Err(SkillfileError::Network(format!("{e} fetching {url}"))),
241        }
242    }
243
244    fn post_json(&self, url: &str, body: &str) -> Result<Vec<u8>, SkillfileError> {
245        let mut response = self
246            .build_post(url)
247            .header("Content-Type", "application/json")
248            .send(body.as_bytes())
249            .map_err(|e| match &e {
250                ureq::Error::StatusCode(code) => {
251                    SkillfileError::Network(format!("HTTP {code} posting to {url}"))
252                }
253                _ => SkillfileError::Network(format!("{e} posting to {url}")),
254            })?;
255        response.body_mut().read_to_vec().map_err(|e| {
256            SkillfileError::Network(format!("failed to read response from {url}: {e}"))
257        })
258    }
259
260    fn post_json_with_bearer(&self, req: &BearerPost<'_>) -> Result<Vec<u8>, SkillfileError> {
261        let (url, token) = (req.url, req.token);
262        let mut response = self
263            .agent
264            .post(url)
265            .header("User-Agent", "skillfile/1.0")
266            .header("Content-Type", "application/json")
267            .header("Authorization", &format!("Bearer {token}"))
268            .send(req.body.as_bytes())
269            .map_err(|e| match &e {
270                ureq::Error::StatusCode(code) => {
271                    SkillfileError::Network(format!("HTTP {code} posting to {url}"))
272                }
273                _ => SkillfileError::Network(format!("{e} posting to {url}")),
274            })?;
275        response.body_mut().read_to_vec().map_err(|e| {
276            SkillfileError::Network(format!("failed to read response from {url}: {e}"))
277        })
278    }
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    #[test]
286    fn ureq_client_default_creates_successfully() {
287        let _client = UreqClient::default();
288    }
289
290    /// Verify that `set_config_token` populates `CONFIG_TOKEN`.
291    ///
292    /// `OnceLock` can only be written once per process; this test confirms the
293    /// happy-path write succeeds (or that a prior write is already present).
294    #[test]
295    fn set_config_token_populates_cache() {
296        set_config_token(Some("test-token-abc".to_string()));
297        // Either we just set it, or a previous test already set it.
298        // Either way the lock must be initialised.
299        assert!(CONFIG_TOKEN.get().is_some());
300    }
301
302    // -- GithubToken newtype tests -----------------------------------------------
303    //
304    // Test the opaque wrapper that makes it structurally impossible to
305    // extract the raw token without providing a GitHub URL.
306
307    #[test]
308    fn github_token_type_for_url_rejects_registries() {
309        let token = GithubToken(Some("ghp_secret"));
310        assert!(token.for_url("https://agentskill.sh/api/search").is_none());
311        assert!(token.for_url("https://skills.sh/api/search").is_none());
312        assert!(token
313            .for_url("https://www.skillhub.club/api/v1/skills/search")
314            .is_none());
315    }
316
317    #[test]
318    fn github_token_type_for_url_allows_github() {
319        let token = GithubToken(Some("ghp_secret"));
320        assert_eq!(
321            token.for_url("https://api.github.com/repos/o/r"),
322            Some("ghp_secret")
323        );
324        assert_eq!(
325            token.for_url("https://raw.githubusercontent.com/o/r/HEAD/f"),
326            Some("ghp_secret")
327        );
328    }
329
330    #[test]
331    fn github_token_type_for_url_returns_none_without_token() {
332        let token = GithubToken(None);
333        assert!(token.for_url("https://api.github.com/repos/o/r").is_none());
334    }
335
336    // -- is_github_url tests (token leakage prevention) -----------------------
337
338    #[test]
339    fn github_api_url_is_github() {
340        assert!(is_github_url("https://api.github.com/repos/owner/repo"));
341    }
342
343    #[test]
344    fn github_raw_url_is_github() {
345        assert!(is_github_url(
346            "https://raw.githubusercontent.com/owner/repo/main/file.md"
347        ));
348    }
349
350    #[test]
351    fn github_api_root_is_github() {
352        assert!(is_github_url("https://api.github.com/"));
353    }
354
355    #[test]
356    fn agentskill_url_is_not_github() {
357        assert!(!is_github_url(
358            "https://agentskill.sh/api/agent/search?q=test"
359        ));
360    }
361
362    #[test]
363    fn skillssh_url_is_not_github() {
364        assert!(!is_github_url("https://skills.sh/api/search?q=test"));
365    }
366
367    #[test]
368    fn skillhub_url_is_not_github() {
369        assert!(!is_github_url(
370            "https://www.skillhub.club/api/v1/skills/search"
371        ));
372    }
373
374    #[test]
375    fn spoofed_github_subdomain_is_not_github() {
376        assert!(!is_github_url("https://api.github.com.evil.com/repos"));
377    }
378
379    #[test]
380    fn spoofed_raw_subdomain_is_not_github() {
381        assert!(!is_github_url(
382            "https://raw.githubusercontent.com.evil.com/file"
383        ));
384    }
385
386    #[test]
387    fn empty_url_is_not_github() {
388        assert!(!is_github_url(""));
389    }
390
391    #[test]
392    fn bare_domain_is_not_github() {
393        assert!(!is_github_url("api.github.com/repos"));
394    }
395
396    #[test]
397    fn http_github_url_is_github() {
398        assert!(is_github_url("http://api.github.com/repos/owner/repo"));
399    }
400}