Skip to main content

flake_edit/forge/
api.rs

1use std::collections::HashMap;
2use std::process::Command;
3use std::sync::Mutex;
4use std::sync::OnceLock;
5use std::time::Duration;
6
7use semver::Version;
8use serde::Deserialize;
9use thiserror::Error;
10use ureq::Agent;
11
12use super::version::parse_ref;
13
14type SourceError = Box<dyn std::error::Error + Send + Sync + 'static>;
15
16/// Errors from talking to a forge over HTTP.
17#[derive(Error, Debug)]
18#[non_exhaustive]
19pub enum ApiError {
20    /// Request hit a configured timeout (connect, recv-response,
21    /// or recv-body).
22    #[error("request to {url} timed out")]
23    Timeout {
24        url: String,
25        #[source]
26        source: SourceError,
27    },
28
29    /// Could not establish a connection.
30    #[error("could not reach {url}")]
31    ConnectFailed {
32        url: String,
33        #[source]
34        source: SourceError,
35    },
36
37    #[error("{url} not found (HTTP 404)")]
38    NotFound { url: String },
39
40    #[error("{url} returned HTTP {status}")]
41    HttpStatus { url: String, status: u16 },
42
43    /// Failed to parse the JSON response body returned by the forge.
44    #[error("failed to parse JSON response from {url}")]
45    Json {
46        url: String,
47        #[source]
48        source: serde_json::Error,
49    },
50
51    /// HTTP error not classified above. Reached only if ureq grows
52    /// a new variant or a bespoke connector chain returns one of the
53    /// rarer existing variants.
54    #[error("unexpected HTTP error for {url}")]
55    Other {
56        url: String,
57        #[source]
58        source: SourceError,
59    },
60
61    /// The forge returned no tags for the repository.
62    #[error("no tags found for repository")]
63    NoTagsFound,
64
65    /// Branch listing exhausted retries (both schemes, all pages) without
66    /// returning usable data.
67    #[error("no branches found for repository")]
68    NoBranchesFound,
69}
70
71/// Classify a `ureq::Error` from establishing the request into the
72/// domain `ApiError`. Hand-written rather than `#[from]` so a new
73/// ureq variant must be handled explicitly and cannot silently
74/// become `Other`.
75fn classify_ureq(err: ureq::Error, url: &str) -> ApiError {
76    let url = url.to_string();
77    match err {
78        ureq::Error::StatusCode(404) => ApiError::NotFound { url },
79        ureq::Error::StatusCode(status) => ApiError::HttpStatus { url, status },
80        ureq::Error::Timeout(_) => ApiError::Timeout {
81            url,
82            source: Box::new(err),
83        },
84        ureq::Error::HostNotFound | ureq::Error::ConnectionFailed | ureq::Error::Io(_) => {
85            ApiError::ConnectFailed {
86                url,
87                source: Box::new(err),
88            }
89        }
90        _ => ApiError::Other {
91            url,
92            source: Box::new(err),
93        },
94    }
95}
96
97/// Classify a `ureq::Error` from reading the response body off a
98/// connection that already succeeded. Distinct from `classify_ureq`
99/// because here an `Io(_)` is a mid-stream drop, not a connect
100/// failure; calling it `ConnectFailed` would tell the user we never
101/// reached the forge when in fact the peer hung up halfway through.
102fn classify_body_read(err: ureq::Error, url: &str) -> ApiError {
103    let url = url.to_string();
104    match err {
105        ureq::Error::Timeout(_) => ApiError::Timeout {
106            url,
107            source: Box::new(err),
108        },
109        _ => ApiError::Other {
110            url,
111            source: Box::new(err),
112        },
113    }
114}
115
116/// Headers for HTTP requests
117#[derive(Clone, Default)]
118pub(crate) struct Headers {
119    pub(crate) user_agent: Option<String>,
120    pub(crate) authorization: Option<String>,
121}
122
123impl Headers {
124    /// Headers with optional Bearer token authentication for the given domain.
125    fn for_domain(domain: &str) -> Self {
126        let authorization = get_forge_token(domain).map(|token| {
127            tracing::debug!("Found token for {}", domain);
128            format!("Bearer {token}")
129        });
130        Self {
131            user_agent: Some("flake-edit".to_string()),
132            authorization,
133        }
134    }
135}
136
137/// Outcome of a conditional GET. Distinguishes "the cached body is
138/// still authoritative" from "here is a fresh body and its new ETag".
139pub(crate) enum ConditionalResponse {
140    /// The server returned 304: the caller's cached body for this URL
141    /// is still authoritative.
142    NotModified,
143    /// Fresh response. `etag` is `None` when the server did not send
144    /// one, in which case the response is uncacheable.
145    Body { body: String, etag: Option<String> },
146}
147
148/// HTTP layer: one shared `ureq::Agent` with explicit timeouts. The
149/// only direct user is [`super::cache::HttpCache`], which adds
150/// persistent ETag revalidation on top.
151pub(crate) struct HttpClient {
152    agent: Agent,
153}
154
155impl Default for HttpClient {
156    fn default() -> Self {
157        let config = ureq::Agent::config_builder()
158            .timeout_connect(Some(Duration::from_secs(10)))
159            .timeout_recv_response(Some(Duration::from_secs(30)))
160            .timeout_recv_body(Some(Duration::from_secs(30)))
161            .build();
162        Self {
163            agent: Agent::new_with_config(config),
164        }
165    }
166}
167
168impl HttpClient {
169    fn build(
170        &self,
171        url: &str,
172        headers: &Headers,
173    ) -> ureq::RequestBuilder<ureq::typestate::WithoutBody> {
174        let mut request = self.agent.get(url);
175        if let Some(ref ua) = headers.user_agent {
176            request = request.header("User-Agent", ua);
177        }
178        if let Some(ref auth) = headers.authorization {
179            request = request.header("Authorization", auth);
180        }
181        request
182    }
183
184    pub(crate) fn get(&self, url: &str, headers: &Headers) -> Result<String, ApiError> {
185        let body = self
186            .build(url, headers)
187            .call()
188            .map_err(|e| classify_ureq(e, url))?
189            .body_mut()
190            .read_to_string()
191            .map_err(|e| classify_body_read(e, url))?;
192        Ok(body)
193    }
194
195    /// Probe a URL and report whether the resource exists.
196    ///
197    /// `Ok(true)` for any 2xx response, `Ok(false)` for HTTP 404,
198    /// `Err(_)` for anything else (timeout, connect failure, 5xx,
199    /// ...). The caller must not collapse these three into a bool:
200    /// "branch does not exist" and "could not reach the forge" are
201    /// different answers and the user needs to see the latter.
202    pub(crate) fn head_status(&self, url: &str, headers: &Headers) -> Result<bool, ApiError> {
203        match self.build(url, headers).call() {
204            Ok(_) => Ok(true),
205            Err(e) => match classify_ureq(e, url) {
206                ApiError::NotFound { .. } => Ok(false),
207                other => Err(other),
208            },
209        }
210    }
211
212    /// `ureq` reports a 304 as `Error::StatusCode(304)` because it is
213    /// outside the 2xx range. We intercept that error variant
214    /// specifically so the cache layer sees a clean
215    /// `ConditionalResponse::NotModified` and the body-reading path
216    /// is reached only for real 2xx responses.
217    pub(crate) fn get_conditional(
218        &self,
219        url: &str,
220        headers: &Headers,
221        etag: Option<&str>,
222    ) -> Result<ConditionalResponse, ApiError> {
223        let mut request = self.build(url, headers);
224        if let Some(etag) = etag {
225            request = request.header("If-None-Match", etag);
226        }
227        match request.call() {
228            Ok(mut response) => {
229                let new_etag = response
230                    .headers()
231                    .get("etag")
232                    .and_then(|v| v.to_str().ok())
233                    .map(String::from);
234                let body = response
235                    .body_mut()
236                    .read_to_string()
237                    .map_err(|e| classify_body_read(e, url))?;
238                Ok(ConditionalResponse::Body {
239                    body,
240                    etag: new_etag,
241                })
242            }
243            Err(ureq::Error::StatusCode(304)) => Ok(ConditionalResponse::NotModified),
244            Err(e) => Err(classify_ureq(e, url)),
245        }
246    }
247
248    pub(crate) fn post_json(
249        &self,
250        url: &str,
251        headers: &Headers,
252        body: &str,
253    ) -> Result<String, ApiError> {
254        let mut request = self
255            .agent
256            .post(url)
257            .header("Content-Type", "application/json");
258        if let Some(ref ua) = headers.user_agent {
259            request = request.header("User-Agent", ua);
260        }
261        if let Some(ref auth) = headers.authorization {
262            request = request.header("Authorization", auth);
263        }
264        let response_body = request
265            .send(body)
266            .map_err(|e| classify_ureq(e, url))?
267            .body_mut()
268            .read_to_string()
269            .map_err(|e| classify_body_read(e, url))?;
270        Ok(response_body)
271    }
272}
273
274/// Hard cap on paginated listing requests.
275const MAX_PAGES: u32 = 20;
276/// GitHub's maximum page size for list endpoints.
277const PER_PAGE: usize = 100;
278/// Gitea pagination size. Smaller because self-hosted instances
279/// often cap server-side.
280const GITEA_PER_PAGE: usize = 50;
281
282/// Must stay in step with the [`IntermediaryTags`] to [`Tags`]
283/// conversion: [`ForgeClient::fetch_github_tags`] uses this
284/// predicate to decide that page 1 is trustworthy, then hands the
285/// same names to the conversion. If the conversion drops a name
286/// that the predicate accepted, the cheap path can return a
287/// [`Tags`] whose [`Tags::get_latest_tag`] is `None`.
288fn parses_as_semver(name: &str) -> bool {
289    let parsed = parse_ref(name, false);
290    Version::parse(&parsed.normalized_for_semver).is_ok()
291}
292
293/// Drive a paginated listing endpoint, accumulating items in order
294/// until exhaustion or `max_pages` bounds a misbehaving endpoint.
295///
296/// `fetch` receives a 1-based page number and returns the items on
297/// that page. Iteration stops when `fetch` yields fewer than
298/// `per_page` items (real APIs signal "no more" with a short page),
299/// or when `page` reaches `max_pages`, which exists so an endpoint
300/// that always returns full pages cannot loop indefinitely.
301fn paginated<T, F>(per_page: usize, max_pages: u32, mut fetch: F) -> Result<Vec<T>, ApiError>
302where
303    F: FnMut(u32) -> Result<Vec<T>, ApiError>,
304{
305    let mut all = Vec::new();
306    let mut page: u32 = 1;
307    loop {
308        let items = fetch(page)?;
309        let count = items.len();
310        all.extend(items);
311        if count < per_page || page >= max_pages {
312            break;
313        }
314        page += 1;
315    }
316    Ok(all)
317}
318
319/// Unified entry point for talking to forges (GitHub, Gitea/Forgejo).
320///
321/// One shared HTTP agent and per-run result caches for tags,
322/// branches, and branch-exists probes. Locking is fine-grained per
323/// cache: two threads racing on the same missing key may both
324/// fetch, at the cost of one duplicated round trip.
325pub struct ForgeClient {
326    http: super::cache::HttpCache,
327    tags_cache: Mutex<HashMap<RepoKey, Tags>>,
328    branches_cache: Mutex<HashMap<RepoKey, Branches>>,
329    branch_exists_cache: Mutex<HashMap<BranchKey, bool>>,
330    /// `false` when no github.com token is available; unauthenticated
331    /// runs skip the GraphQL batch because the endpoint rejects them
332    /// with HTTP 401, and fall back to anonymous REST.
333    github_graphql_enabled: bool,
334}
335
336/// One unit of work in a [`ForgeClient::batch_warm_github`] call.
337///
338/// The variants mirror the per-repo round trips the REST path would
339/// otherwise make: `Tags` replaces `list_tags`, `ChannelCandidates`
340/// replaces a fan of `branch_exists` probes for a known candidate set.
341/// Both prime the same caches the REST path consults.
342#[derive(Debug, Clone)]
343pub(crate) enum BatchLookup {
344    /// Prime the tags cache for `github.com/{owner}/{repo}`.
345    Tags { owner: String, repo: String },
346    /// Prime `branch_exists` for each `candidate` under
347    /// `refs/heads/{prefix}`. Candidates not present in the GraphQL
348    /// response cache as `false`; ones returned cache as `true`.
349    ChannelCandidates {
350        owner: String,
351        repo: String,
352        /// e.g. `"nixos-"`. Drives the `refPrefix` in the GraphQL query.
353        prefix: String,
354        /// Full branch names the caller will subsequently probe via
355        /// `branch_exists`. Pre-computed by the caller so api.rs stays
356        /// independent of the channel-version generator.
357        candidates: Vec<String>,
358    },
359}
360
361type RepoKey = (String, String, String);
362type BranchKey = (String, String, String, String);
363
364impl Default for ForgeClient {
365    fn default() -> Self {
366        Self::new()
367    }
368}
369
370impl std::fmt::Debug for ForgeClient {
371    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
372        f.debug_struct("ForgeClient").finish_non_exhaustive()
373    }
374}
375
376impl ForgeClient {
377    /// Build a client backed by the XDG-located persistent ETag cache.
378    pub fn new() -> Self {
379        Self {
380            http: super::cache::HttpCache::new(),
381            tags_cache: Mutex::new(HashMap::new()),
382            branches_cache: Mutex::new(HashMap::new()),
383            branch_exists_cache: Mutex::new(HashMap::new()),
384            github_graphql_enabled: get_forge_token("github.com").is_some(),
385        }
386    }
387
388    fn canonical_domain(domain: Option<&str>) -> String {
389        domain.unwrap_or("github.com").to_string()
390    }
391
392    /// Latest-known tags for `(owner, repo)` at `domain`. Cached on success.
393    pub fn list_tags(
394        &self,
395        owner: &str,
396        repo: &str,
397        domain: Option<&str>,
398    ) -> Result<Tags, ApiError> {
399        let key = (
400            Self::canonical_domain(domain),
401            owner.to_string(),
402            repo.to_string(),
403        );
404        if let Some(hit) = self
405            .tags_cache
406            .lock()
407            .expect("forge tags cache poisoned")
408            .get(&key)
409            .cloned()
410        {
411            return Ok(hit);
412        }
413        let fresh = if key.0 == "github.com" {
414            self.fetch_github_tags(owner, repo)?
415        } else {
416            self.fetch_gitea_tags(&key.0, owner, repo)?
417        };
418        self.tags_cache
419            .lock()
420            .expect("forge tags cache poisoned")
421            .insert(key, fresh.clone());
422        Ok(fresh)
423    }
424
425    /// All branch names for `(owner, repo)` at `domain`. Cached on success.
426    pub fn list_branches(
427        &self,
428        owner: &str,
429        repo: &str,
430        domain: Option<&str>,
431    ) -> Result<Branches, ApiError> {
432        let key = (
433            Self::canonical_domain(domain),
434            owner.to_string(),
435            repo.to_string(),
436        );
437        if let Some(hit) = self
438            .branches_cache
439            .lock()
440            .expect("forge branches cache poisoned")
441            .get(&key)
442            .cloned()
443        {
444            return Ok(hit);
445        }
446        let fresh = if key.0 == "github.com" {
447            self.fetch_github_branches(owner, repo)?
448        } else {
449            self.fetch_gitea_branches(&key.0, owner, repo)?
450        };
451        self.branches_cache
452            .lock()
453            .expect("forge branches cache poisoned")
454            .insert(key, fresh.clone());
455        Ok(fresh)
456    }
457
458    /// Single-branch existence probe.
459    ///
460    /// `Ok(true)` for an existing branch, `Ok(false)` for a forge that
461    /// returned 404, `Err(_)` for any transient failure (timeout,
462    /// DNS, 5xx, ...). Both `Ok(true)` and `Ok(false)` are cached;
463    /// errors are not.
464    pub fn branch_exists(
465        &self,
466        owner: &str,
467        repo: &str,
468        branch: &str,
469        domain: Option<&str>,
470    ) -> Result<bool, ApiError> {
471        let key = (
472            Self::canonical_domain(domain),
473            owner.to_string(),
474            repo.to_string(),
475            branch.to_string(),
476        );
477        if let Some(&hit) = self
478            .branch_exists_cache
479            .lock()
480            .expect("forge branch_exists cache poisoned")
481            .get(&key)
482        {
483            return Ok(hit);
484        }
485        let fresh = if key.0 == "github.com" {
486            self.fetch_github_branch_exists(owner, repo, branch)?
487        } else {
488            self.fetch_gitea_branch_exists(&key.0, owner, repo, branch)?
489        };
490        self.branch_exists_cache
491            .lock()
492            .expect("forge branch_exists cache poisoned")
493            .insert(key, fresh);
494        Ok(fresh)
495    }
496
497    /// Resolve many `github.com` lookups in one GraphQL POST and
498    /// prime the per-run caches with the results.
499    ///
500    /// `Tags` lookups populate `tags_cache`; `ChannelCandidates`
501    /// lookups populate `branch_exists_cache` for every candidate
502    /// (returned branches as `true`, missing ones as `false`).
503    /// Subsequent calls to [`ForgeClient::list_tags`] and
504    /// [`ForgeClient::branch_exists`] for the same `(owner, repo)`
505    /// then hit the cache instead of issuing per-repo REST round
506    /// trips.
507    ///
508    /// Per-input errors do not abort the batch. A repo that GitHub
509    /// returns as `null` (private, missing) leaves its cache slot
510    /// empty so the caller falls through to the REST path, which
511    /// surfaces the underlying error in context. The whole call
512    /// returns `Err` only when the POST itself fails.
513    pub(crate) fn batch_warm_github(&self, lookups: &[BatchLookup]) -> Result<usize, ApiError> {
514        if !self.github_graphql_enabled || lookups.is_empty() {
515            return Ok(0);
516        }
517        let headers = Headers::for_domain("github.com");
518        let (query, aliases) = build_graphql_query(lookups);
519        let payload = serde_json::json!({ "query": query }).to_string();
520        let url = "https://api.github.com/graphql";
521        tracing::debug!(
522            "Batching {} github.com lookup(s) into one GraphQL POST",
523            aliases.len()
524        );
525        let body = self.http.post_json(url, &headers, &payload)?;
526        let parsed: GraphQlResponse =
527            serde_json::from_str(&body).map_err(|source| ApiError::Json {
528                url: url.to_string(),
529                source,
530            })?;
531
532        let mut primed = 0usize;
533        for (alias, lookup) in &aliases {
534            let Some(node) = parsed.data.as_ref().and_then(|d| d.get(alias)) else {
535                continue;
536            };
537            let Some(repo) = node.as_ref() else {
538                // GraphQL returned null for this repo (private, missing,
539                // or partial-errors). Skip and let REST surface the
540                // error in context.
541                tracing::debug!("GraphQL returned null for alias {}", alias);
542                continue;
543            };
544            let names: Vec<String> = repo
545                .refs
546                .as_ref()
547                .map(|r| r.nodes.iter().map(|n| n.name.clone()).collect())
548                .unwrap_or_default();
549            match lookup {
550                BatchLookup::Tags { owner, repo: r } => {
551                    let inter = IntermediaryTags(
552                        names
553                            .into_iter()
554                            .map(|name| IntermediaryTag { name })
555                            .collect(),
556                    );
557                    let tags: Tags = inter.into();
558                    let key = ("github.com".to_string(), owner.clone(), r.clone());
559                    self.tags_cache
560                        .lock()
561                        .expect("forge tags cache poisoned")
562                        .insert(key, tags);
563                    primed += 1;
564                }
565                BatchLookup::ChannelCandidates {
566                    owner,
567                    repo: r,
568                    candidates,
569                    ..
570                } => {
571                    let returned: std::collections::HashSet<&str> =
572                        names.iter().map(|n| n.as_str()).collect();
573                    let mut cache = self
574                        .branch_exists_cache
575                        .lock()
576                        .expect("forge branch_exists cache poisoned");
577                    for candidate in candidates {
578                        let key = (
579                            "github.com".to_string(),
580                            owner.clone(),
581                            r.clone(),
582                            candidate.clone(),
583                        );
584                        cache.insert(key, returned.contains(candidate.as_str()));
585                    }
586                    primed += 1;
587                }
588            }
589        }
590        Ok(primed)
591    }
592
593    /// Fetch tags from `github.com/{owner}/{repo}`, trusting page 1
594    /// when it contains at least one parseable semver tag.
595    ///
596    /// GitHub's `/tags` orders by ref creation time, not by semver,
597    /// so the cheap path is right for monotone version progressions
598    /// and falls back to a paginated walk capped at [`MAX_PAGES`]
599    /// when page 1 is full but contains no semver-parseable names
600    /// (hash- or date-style tagging schemes). Repos that backport
601    /// onto an older branch can push the latest major off page 1;
602    /// users hitting that must pin manually.
603    fn fetch_github_tags(&self, owner: &str, repo: &str) -> Result<Tags, ApiError> {
604        let headers = Headers::for_domain("github.com");
605        let url = |page: u32| {
606            format!(
607                "https://api.github.com/repos/{owner}/{repo}/tags?per_page={PER_PAGE}&page={page}"
608            )
609        };
610
611        let first_url = url(1);
612        tracing::debug!("Fetching tags page 1: {}", first_url);
613        let body = self.http.get(&first_url, &headers)?;
614        let first: IntermediaryTags =
615            serde_json::from_str(&body).map_err(|source| ApiError::Json {
616                url: first_url.clone(),
617                source,
618            })?;
619        let mut all = first.0;
620        let first_was_full = all.len() >= PER_PAGE;
621        let first_has_semver = all.iter().any(|t| parses_as_semver(&t.name));
622
623        if !first_was_full || first_has_semver {
624            tracing::debug!(
625                "Cheap path returned {} tag(s) from page 1 (full={}, has_semver={})",
626                all.len(),
627                first_was_full,
628                first_has_semver
629            );
630            return Ok(IntermediaryTags(all).into());
631        }
632
633        tracing::debug!(
634            "Page 1 had no parseable semver in a full page; falling back to paginated walk"
635        );
636        for page in 2..=MAX_PAGES {
637            let page_url = url(page);
638            tracing::debug!("Fetching tags page {}: {}", page, page_url);
639            let body = self.http.get(&page_url, &headers)?;
640            let next: IntermediaryTags =
641                serde_json::from_str(&body).map_err(|source| ApiError::Json {
642                    url: page_url.clone(),
643                    source,
644                })?;
645            let count = next.0.len();
646            all.extend(next.0);
647            if count < PER_PAGE {
648                break;
649            }
650        }
651        tracing::debug!("Total tags fetched: {}", all.len());
652        Ok(IntermediaryTags(all).into())
653    }
654
655    fn fetch_github_branches(&self, owner: &str, repo: &str) -> Result<Branches, ApiError> {
656        let headers = Headers::for_domain("github.com");
657        let branches = paginated(PER_PAGE, MAX_PAGES, |page| {
658            let url = format!(
659                "https://api.github.com/repos/{owner}/{repo}/branches?per_page={PER_PAGE}&page={page}"
660            );
661            tracing::debug!("Fetching branches page {}: {}", page, url);
662            let body = self.http.get(&url, &headers)?;
663            let page_branches =
664                serde_json::from_str::<IntermediaryBranches>(&body).map_err(|source| {
665                    ApiError::Json {
666                        url: url.clone(),
667                        source,
668                    }
669                })?;
670            tracing::debug!("Got {} branches on page {}", page_branches.0.len(), page);
671            Ok(page_branches.0)
672        })?;
673        tracing::debug!("Total branches fetched: {}", branches.len());
674        Ok(IntermediaryBranches(branches).into())
675    }
676
677    fn fetch_github_branch_exists(
678        &self,
679        owner: &str,
680        repo: &str,
681        branch: &str,
682    ) -> Result<bool, ApiError> {
683        let headers = Headers::for_domain("github.com");
684        let url = format!("https://api.github.com/repos/{owner}/{repo}/branches/{branch}");
685        self.http.head_status(&url, &headers)
686    }
687
688    fn fetch_gitea_tags(&self, domain: &str, owner: &str, repo: &str) -> Result<Tags, ApiError> {
689        let headers = Headers::for_domain(domain);
690
691        // Try HTTPS, fall back to HTTP.
692        for scheme in ["https", "http"] {
693            let url = format!("{scheme}://{domain}/api/v1/repos/{owner}/{repo}/tags");
694            tracing::debug!("Trying Gitea tags endpoint: {}", url);
695
696            if let Ok(body) = self.http.get(&url, &headers) {
697                tracing::debug!("Body from Gitea API: {body}");
698                if let Ok(tags) = serde_json::from_str::<IntermediaryTags>(&body) {
699                    return Ok(tags.into());
700                }
701            }
702        }
703
704        Err(ApiError::NoTagsFound)
705    }
706
707    fn fetch_gitea_branches(
708        &self,
709        domain: &str,
710        owner: &str,
711        repo: &str,
712    ) -> Result<Branches, ApiError> {
713        let headers = Headers::for_domain(domain);
714        let mut all_branches = Vec::new();
715        let mut page = 1;
716
717        for scheme in ["https", "http"] {
718            loop {
719                let url = format!(
720                    "{scheme}://{domain}/api/v1/repos/{owner}/{repo}/branches?limit={GITEA_PER_PAGE}&page={page}"
721                );
722                tracing::debug!("Trying Gitea branches endpoint: {}", url);
723
724                match self.http.get(&url, &headers) {
725                    Ok(body) => {
726                        tracing::debug!("Body from Gitea API: {body}");
727                        match serde_json::from_str::<IntermediaryBranches>(&body) {
728                            Ok(page_branches) => {
729                                let count = page_branches.0.len();
730                                all_branches.extend(page_branches.0);
731
732                                if count < GITEA_PER_PAGE || page >= MAX_PAGES {
733                                    return Ok(IntermediaryBranches(all_branches).into());
734                                }
735                                page += 1;
736                            }
737                            Err(_) => break, // Try next scheme
738                        }
739                    }
740                    Err(_) => break, // Try next scheme
741                }
742            }
743
744            if !all_branches.is_empty() {
745                return Ok(IntermediaryBranches(all_branches).into());
746            }
747            page = 1; // Reset for next scheme
748        }
749
750        Err(ApiError::NoBranchesFound)
751    }
752
753    fn fetch_gitea_branch_exists(
754        &self,
755        domain: &str,
756        owner: &str,
757        repo: &str,
758        branch: &str,
759    ) -> Result<bool, ApiError> {
760        let headers = Headers::for_domain(domain);
761        // Try https, fall back to http. The first scheme to give a
762        // definitive answer (2xx or 404) wins; http is tried only
763        // when https errored out before reaching the application
764        // layer.
765        let mut last_err: Option<ApiError> = None;
766        for scheme in ["https", "http"] {
767            let url = format!("{scheme}://{domain}/api/v1/repos/{owner}/{repo}/branches/{branch}");
768            match self.http.head_status(&url, &headers) {
769                Ok(answer) => return Ok(answer),
770                Err(e) => last_err = Some(e),
771            }
772        }
773        Err(last_err.expect("at least one scheme was attempted"))
774    }
775}
776
777/// Build a single GraphQL document that resolves every `lookup`.
778///
779/// Each lookup is wrapped in an aliased `repository(owner:, name:)`
780/// query (`r0`, `r1`, ...) so the response can be demultiplexed by
781/// alias even when two lookups target the same repo. GitHub GraphQL
782/// requires unique aliases when the same field appears more than
783/// once in a selection set; the index suffix guarantees uniqueness
784/// regardless of how the caller deduplicates upstream.
785fn build_graphql_query(lookups: &[BatchLookup]) -> (String, Vec<(String, BatchLookup)>) {
786    let mut query = String::from("query {\n");
787    let mut aliases = Vec::with_capacity(lookups.len());
788    for (i, lookup) in lookups.iter().enumerate() {
789        let alias = format!("r{i}");
790        let (owner, repo) = match lookup {
791            BatchLookup::Tags { owner, repo } => (owner, repo),
792            BatchLookup::ChannelCandidates { owner, repo, .. } => (owner, repo),
793        };
794        query.push_str(&format!(
795            "  {alias}: repository(owner:{owner}, name:{repo}) {{\n",
796            owner = json_string(owner),
797            repo = json_string(repo),
798        ));
799        match lookup {
800            BatchLookup::Tags { .. } => {
801                // `orderBy: TAG_COMMIT_DATE DESC` is what makes the
802                // first 100 refs comparable to REST's
803                // `/tags?per_page=100&page=1`. Without it, GitHub
804                // returns refs in lexicographic order, which for a
805                // repo with more than 100 tags can push the
806                // highest-semver tag off the window and silently
807                // cache a stale "latest". The REST cheap path
808                // defends with a paginated fallback; the GraphQL
809                // path has no equivalent, so the ordering hint is
810                // load-bearing rather than cosmetic.
811                query.push_str(
812                    "    refs(refPrefix:\"refs/tags/\", first:100, \
813                     orderBy:{field: TAG_COMMIT_DATE, direction: DESC}) {\n",
814                );
815            }
816            BatchLookup::ChannelCandidates { prefix, .. } => {
817                // `first:100` is GitHub GraphQL's hard cap for `refs`.
818                // For the channel prefixes we care about (`nixos-`,
819                // `nixpkgs-`, `release-`, `nix-darwin-`) the universe
820                // of branches is far smaller, but the cap leaves
821                // headroom for forks that accumulate stale release
822                // branches without truncating real candidates into
823                // false-negatives.
824                query.push_str(&format!(
825                    "    refs(refPrefix:{p}, first:100) {{\n",
826                    p = json_string(&format!("refs/heads/{prefix}")),
827                ));
828            }
829        }
830        query.push_str("      nodes { name }\n");
831        query.push_str("    }\n");
832        query.push_str("  }\n");
833        aliases.push((alias, lookup.clone()));
834    }
835    query.push_str("}\n");
836    (query, aliases)
837}
838
839/// Escape `s` into a JSON-quoted string suitable for inlining as a
840/// GraphQL argument literal. `serde_json::to_string` is reused
841/// instead of hand-rolling escapes so backslash, quote, and control
842/// character handling stays correct.
843fn json_string(s: &str) -> String {
844    serde_json::Value::String(s.to_string()).to_string()
845}
846
847/// GraphQL response shape. Both lookup kinds collapse to the same
848/// `repository { refs { nodes { name } } }` projection on the wire,
849/// so one struct covers both.
850#[derive(Deserialize, Debug)]
851struct GraphQlResponse {
852    /// Absent when GitHub returned only an `errors` block (e.g. 401);
853    /// each entry can be `None` when a single repo failed inside an
854    /// otherwise-successful response.
855    data: Option<HashMap<String, Option<GraphQlRepo>>>,
856}
857
858#[derive(Deserialize, Debug)]
859struct GraphQlRepo {
860    refs: Option<GraphQlRefs>,
861}
862
863#[derive(Deserialize, Debug)]
864struct GraphQlRefs {
865    nodes: Vec<GraphQlRefName>,
866}
867
868#[derive(Deserialize, Debug)]
869struct GraphQlRefName {
870    name: String,
871}
872
873#[derive(Deserialize, Debug)]
874struct IntermediaryTags(Vec<IntermediaryTag>);
875
876#[derive(Deserialize, Debug)]
877struct IntermediaryBranches(Vec<IntermediaryBranch>);
878
879#[derive(Deserialize, Debug)]
880struct IntermediaryBranch {
881    name: String,
882}
883
884#[derive(Debug, Default, Clone)]
885pub struct Branches {
886    pub names: Vec<String>,
887}
888
889#[derive(Debug, Clone)]
890pub struct Tags {
891    versions: Vec<TagVersion>,
892}
893
894impl Tags {
895    /// Latest semver-ordered tag, or `None` for an empty / fully
896    /// unparseable set.
897    pub fn get_latest_tag(&self) -> Option<String> {
898        self.versions
899            .iter()
900            .max_by(|a, b| a.version.cmp_precedence(&b.version))
901            .map(|tag| tag.original.clone())
902    }
903}
904
905#[derive(Deserialize, Debug)]
906struct IntermediaryTag {
907    name: String,
908}
909
910#[derive(Debug, Clone)]
911struct TagVersion {
912    version: Version,
913    original: String,
914}
915
916#[derive(Deserialize, Debug, Clone)]
917struct NixConfig {
918    #[serde(rename = "access-tokens")]
919    access_tokens: Option<AccessTokens>,
920}
921
922impl NixConfig {
923    fn forge_token(&self, domain: &str) -> Option<String> {
924        self.access_tokens.as_ref()?.value.get(domain).cloned()
925    }
926}
927
928#[derive(Deserialize, Debug, Clone)]
929struct AccessTokens {
930    value: HashMap<String, String>,
931}
932
933/// Per-process cache of resolved forge tokens, keyed by domain.
934///
935/// Scope is per-process because the resolver inputs (`nix.conf`,
936/// `GITHUB_TOKEN` and friends) do not change during a `flake-edit`
937/// invocation; we never re-read them. `None` is cached the same
938/// as `Some(_)` so a domain with no configured token does not
939/// re-fork `nix config show --json` on every request.
940fn token_cache() -> &'static Mutex<HashMap<String, Option<String>>> {
941    static CACHE: OnceLock<Mutex<HashMap<String, Option<String>>>> = OnceLock::new();
942    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
943}
944
945fn get_forge_token(domain: &str) -> Option<String> {
946    {
947        let cache = token_cache().lock().expect("forge token cache poisoned");
948        if let Some(cached) = cache.get(domain) {
949            return cached.clone();
950        }
951    }
952    // Resolve outside the lock so a slow `nix` fork does not
953    // block lookups for unrelated domains. A racing duplicate
954    // resolution is harmless: `or_insert_with` keeps whichever
955    // value was inserted first, and both racers compute the same
956    // answer for the same domain.
957    let resolved = resolve_forge_token(domain);
958    let mut cache = token_cache().lock().expect("forge token cache poisoned");
959    cache
960        .entry(domain.to_string())
961        .or_insert_with(|| resolved.clone())
962        .clone()
963}
964
965/// Resolve `domain`'s forge token from scratch, with no caching.
966///
967/// Forks `nix config show --json` on every call. Callers must
968/// route through [`get_forge_token`] so repeat lookups do not
969/// re-fork.
970fn resolve_forge_token(domain: &str) -> Option<String> {
971    if let Ok(output) = Command::new("nix")
972        .arg("config")
973        .arg("show")
974        .arg("--json")
975        .output()
976        && let Ok(stdout) = String::from_utf8(output.stdout)
977        && let Ok(config) = serde_json::from_str::<NixConfig>(&stdout)
978        && let Some(token) = config.forge_token(domain)
979    {
980        return Some(token);
981    }
982
983    if let Ok(token) = std::env::var("GITEA_TOKEN") {
984        return Some(token);
985    }
986    if let Ok(token) = std::env::var("FORGEJO_TOKEN") {
987        return Some(token);
988    }
989    if domain == "github.com"
990        && let Ok(token) = std::env::var("GITHUB_TOKEN")
991    {
992        return Some(token);
993    }
994
995    // Last-resort fallback: shell out to `gh auth token --hostname
996    // <domain>`. A user who only ran `gh auth login` is anonymous to
997    // every check above (the gh CLI stores its token in its own
998    // credential file, not in nix.conf or the environment), so this
999    // single fork is what lifts that population from the 60/hr
1000    // anonymous rate limit onto a real token. `gh` exits non-zero
1001    // when it has no token for the host, which we treat as "no
1002    // token" and fall through.
1003    if let Ok(output) = Command::new("gh")
1004        .args(["auth", "token", "--hostname", domain])
1005        .output()
1006        && output.status.success()
1007        && let Ok(stdout) = String::from_utf8(output.stdout)
1008    {
1009        let token = stdout.trim();
1010        if !token.is_empty() {
1011            return Some(token.to_string());
1012        }
1013    }
1014
1015    None
1016}
1017
1018impl From<IntermediaryTags> for Tags {
1019    fn from(value: IntermediaryTags) -> Self {
1020        let mut versions = vec![];
1021        for itag in value.0 {
1022            let parsed = parse_ref(&itag.name, false);
1023            let normalized = parsed.normalized_for_semver;
1024            match Version::parse(&normalized) {
1025                Ok(semver) => {
1026                    versions.push(TagVersion {
1027                        version: semver,
1028                        original: parsed.original_ref,
1029                    });
1030                }
1031                Err(e) => {
1032                    tracing::error!("Could not parse version {:?}", e);
1033                }
1034            }
1035        }
1036        Tags { versions }
1037    }
1038}
1039
1040impl From<IntermediaryBranches> for Branches {
1041    fn from(value: IntermediaryBranches) -> Self {
1042        Branches {
1043            names: value.0.into_iter().map(|b| b.name).collect(),
1044        }
1045    }
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050    use super::*;
1051
1052    const URL: &str = "https://api.github.com/repos/foo/bar/branches/baz";
1053
1054    #[test]
1055    fn paginated_accumulates_until_short_page() {
1056        let pages: Vec<Vec<u32>> = vec![(0..100).collect(), (100..105).collect()];
1057        let mut calls = 0u32;
1058        let result = paginated::<u32, _>(100, MAX_PAGES, |page| {
1059            calls += 1;
1060            Ok(pages[(page - 1) as usize].clone())
1061        })
1062        .unwrap();
1063        assert_eq!(calls, 2, "stops after the short page, no third request");
1064        assert_eq!(result.len(), 105);
1065        assert_eq!(result.first().copied(), Some(0));
1066        assert_eq!(result.last().copied(), Some(104));
1067    }
1068
1069    #[test]
1070    fn paginated_caps_at_max_pages() {
1071        let mut calls = 0u32;
1072        let result = paginated::<u32, _>(2, 3, |_| {
1073            calls += 1;
1074            Ok(vec![1, 2])
1075        })
1076        .unwrap();
1077        assert_eq!(calls, 3, "safety cap halts the loop on always-full pages");
1078        assert_eq!(result.len(), 6);
1079    }
1080
1081    #[test]
1082    fn classify_404_is_not_found() {
1083        let err = classify_ureq(ureq::Error::StatusCode(404), URL);
1084        match err {
1085            ApiError::NotFound { url } => assert_eq!(url, URL),
1086            other => panic!("expected NotFound, got {other:?}"),
1087        }
1088    }
1089
1090    #[test]
1091    fn classify_500_is_http_status() {
1092        let err = classify_ureq(ureq::Error::StatusCode(503), URL);
1093        match err {
1094            ApiError::HttpStatus { url, status } => {
1095                assert_eq!(url, URL);
1096                assert_eq!(status, 503);
1097            }
1098            other => panic!("expected HttpStatus, got {other:?}"),
1099        }
1100    }
1101
1102    #[test]
1103    fn body_read_io_is_not_connect_failed() {
1104        let io = std::io::Error::other("peer closed");
1105        let err = classify_body_read(ureq::Error::Io(io), URL);
1106        match err {
1107            ApiError::Other { url, .. } => assert_eq!(url, URL),
1108            other => panic!("expected Other, got {other:?}"),
1109        }
1110    }
1111
1112    #[test]
1113    fn tags_parsing_with_refs_tags_prefix() {
1114        let json = r#"[
1115            {"name": "refs/tags/v1.0.0"},
1116            {"name": "refs/tags/v2.0.0"},
1117            {"name": "refs/tags/v1.5.0"}
1118        ]"#;
1119
1120        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1121        let tags: Tags = intermediary.into();
1122
1123        assert_eq!(tags.get_latest_tag(), Some("refs/tags/v2.0.0".to_string()));
1124    }
1125
1126    #[test]
1127    fn tags_parsing_with_short_versions() {
1128        let json = r#"[
1129            {"name": "v1"},
1130            {"name": "v1.1"}
1131        ]"#;
1132
1133        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1134        let tags: Tags = intermediary.into();
1135
1136        assert_eq!(tags.get_latest_tag(), Some("v1.1".to_string()));
1137    }
1138
1139    #[test]
1140    fn tags_parsing_without_prefix() {
1141        let json = r#"[
1142            {"name": "1.0.0"},
1143            {"name": "2.0.0"},
1144            {"name": "1.5.0"}
1145        ]"#;
1146
1147        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1148        let tags: Tags = intermediary.into();
1149
1150        assert_eq!(tags.get_latest_tag(), Some("2.0.0".to_string()));
1151    }
1152
1153    #[test]
1154    fn tags_parsing_with_dash_prefix() {
1155        let json = r#"[
1156            {"name": "release-1.0.0"},
1157            {"name": "release-2.0.0"},
1158            {"name": "release-1.5.0"}
1159        ]"#;
1160
1161        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1162        let tags: Tags = intermediary.into();
1163
1164        assert_eq!(tags.get_latest_tag(), Some("release-2.0.0".to_string()));
1165    }
1166
1167    #[test]
1168    fn tags_parsing_mixed_valid_invalid() {
1169        let json = r#"[
1170            {"name": "v1.0.0"},
1171            {"name": "v2.0.0"},
1172            {"name": "invalid-tag"},
1173            {"name": "v1.5.0"}
1174        ]"#;
1175
1176        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1177        let tags: Tags = intermediary.into();
1178
1179        assert_eq!(tags.get_latest_tag(), Some("v2.0.0".to_string()));
1180    }
1181
1182    #[test]
1183    fn tags_parsing_empty() {
1184        let json = r#"[]"#;
1185
1186        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1187        let tags: Tags = intermediary.into();
1188
1189        assert_eq!(tags.get_latest_tag(), None);
1190    }
1191
1192    #[test]
1193    fn tags_parsing_orders_prereleases_by_semver_precedence() {
1194        let json = r#"[
1195            {"name": "v1.0.0"},
1196            {"name": "v2.0.0-beta.1"},
1197            {"name": "v1.5.0"}
1198        ]"#;
1199
1200        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1201        let tags: Tags = intermediary.into();
1202
1203        // `v2.0.0-beta.1` beats `v1.5.0` because semver precedence
1204        // compares major before flagging prerelease status; a higher
1205        // major wins even when the higher tag is a prerelease.
1206        assert_eq!(tags.get_latest_tag(), Some("v2.0.0-beta.1".to_string()));
1207    }
1208
1209    #[test]
1210    fn tags_parsing_handles_hl_prefixed_scheme_without_downgrade() {
1211        let json = r#"[
1212            {"name": "hl0.21.0-1"},
1213            {"name": "hl0.33.0-1"},
1214            {"name": "hl0.46.0-1"},
1215            {"name": "hl0.47.0-1"}
1216        ]"#;
1217
1218        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1219        let tags: Tags = intermediary.into();
1220
1221        assert_eq!(tags.get_latest_tag(), Some("hl0.47.0-1".to_string()));
1222    }
1223
1224    #[test]
1225    fn tags_parsing_combined_prefixes() {
1226        let json = r#"[
1227            {"name": "refs/tags/v1.0.0"},
1228            {"name": "refs/tags/v2.0.0"}
1229        ]"#;
1230
1231        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1232        let tags: Tags = intermediary.into();
1233
1234        assert_eq!(tags.get_latest_tag(), Some("refs/tags/v2.0.0".to_string()));
1235    }
1236
1237    #[test]
1238    fn tags_sort_by_semver_not_lex() {
1239        let json = r#"[
1240            {"name": "v10.0.0"},
1241            {"name": "v2.0.0"},
1242            {"name": "v1.0.0"}
1243        ]"#;
1244
1245        let intermediary: IntermediaryTags = serde_json::from_str(json).unwrap();
1246        let tags: Tags = intermediary.into();
1247
1248        assert_eq!(tags.get_latest_tag(), Some("v10.0.0".to_string()));
1249    }
1250
1251    #[test]
1252    fn http_client_has_explicit_timeouts() {
1253        // Without timeouts a hung TCP connect blocks the whole CLI.
1254        // The exact values are tunable but they must not be unset.
1255        let client = HttpClient::default();
1256        let timeouts = client.agent.config().timeouts();
1257        assert!(
1258            timeouts.connect.is_some(),
1259            "connect timeout must be set on the HTTP agent"
1260        );
1261        assert!(
1262            timeouts.recv_response.is_some(),
1263            "recv_response timeout must be set on the HTTP agent"
1264        );
1265    }
1266
1267    #[test]
1268    fn forge_client_is_send_and_sync() {
1269        fn assert_send_sync<T: Send + Sync>() {}
1270        assert_send_sync::<ForgeClient>();
1271    }
1272
1273    #[test]
1274    fn parses_as_semver_recognizes_normalized_shapes() {
1275        // Pins the agreement between the cheap-path early-stop and
1276        // the `IntermediaryTags -> Tags` conversion. See the doc on
1277        // `parses_as_semver` for why divergence is unsound.
1278        assert!(parses_as_semver("v1.2.3"));
1279        assert!(parses_as_semver("refs/tags/v1.2.3"));
1280        assert!(parses_as_semver("release-1.2.3"));
1281        assert!(parses_as_semver("v1")); // normalized to v1.0.0
1282        assert!(parses_as_semver("1.0.0+gitea")); // build metadata is valid semver
1283        assert!(parses_as_semver("release-1.5")); // 2-segment pads to 1.5.0
1284        assert!(!parses_as_semver("invalid-tag"));
1285        assert!(!parses_as_semver("abc"));
1286        assert!(!parses_as_semver(""));
1287        // Leading-zero rejection: `release-25.05` normalizes to
1288        // `25.05.0`, which the semver crate rejects because `05` has
1289        // a leading zero. Channel-style year.month refs must NOT be
1290        // accepted by the cheap path: a flake using `release-25.05`
1291        // (or `release-24.05`) on a github.com repo would otherwise
1292        // be mis-classified as a semver tag and short-circuit the
1293        // tag walk against a window that does not contain it.
1294        assert!(!parses_as_semver("release-25.05"));
1295        assert!(!parses_as_semver("release-24.05"));
1296        // Prereleases must satisfy the predicate so that a page-1
1297        // listing of prerelease tags still trips the cheap-path
1298        // early-stop. Selection downstream picks a stable release
1299        // on the same page when one exists.
1300        assert!(parses_as_semver("v1.2.3-rc1"));
1301    }
1302
1303    #[test]
1304    fn build_graphql_query_uses_distinct_aliases() {
1305        // GitHub GraphQL requires each `repository(...)` selection in
1306        // one document to use a unique alias. Two lookups that target
1307        // the same `(owner, repo)` must still get distinct aliases,
1308        // otherwise the POST is rejected before any response can be
1309        // demultiplexed.
1310        let lookups = vec![
1311            BatchLookup::Tags {
1312                owner: "same".into(),
1313                repo: "same".into(),
1314            },
1315            BatchLookup::Tags {
1316                owner: "same".into(),
1317                repo: "same".into(),
1318            },
1319        ];
1320        let (query, aliases) = build_graphql_query(&lookups);
1321        assert_eq!(aliases.len(), 2);
1322        assert_eq!(aliases[0].0, "r0");
1323        assert_eq!(aliases[1].0, "r1");
1324        assert!(
1325            query.contains("r0:"),
1326            "first lookup must use the r0 alias; query was:\n{query}"
1327        );
1328        assert!(
1329            query.contains("r1:"),
1330            "second lookup must use a distinct r1 alias; query was:\n{query}"
1331        );
1332    }
1333}