Skip to main content

nix_uri/flakeref/
forge.rs

1use std::fmt::Display;
2
3use serde::{Deserialize, Serialize};
4use winnow::{
5    ModalResult, Parser,
6    combinator::{alt, cut_err, opt, preceded, separated_pair, terminated},
7    error::{ContextError, ErrMode, StrContext, StrContextValue},
8    token::take_till,
9};
10
11use crate::{
12    error::{NixUriError, NixUriResult, tag},
13    flakeref::{
14        RefLocation,
15        validators::{looks_like_rev, validate_ref_name},
16    },
17};
18
19/// Which git-forge scheme a `GitForge` reference uses. Spelled in the URL as
20/// the leading `github:`, `gitlab:`, or `sourcehut:` token; also drives the
21/// canonical-domain lookup in [`super::ForgeIdentity`].
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23#[non_exhaustive]
24pub enum GitForgePlatform {
25    GitHub,
26    GitLab,
27    SourceHut,
28}
29
30/// A reference into a git forge (`github:`, `gitlab:`, `sourcehut:`).
31///
32/// Ref and rev are stored as separate typed slots; the parser splits a
33/// path-component value (`github:owner/repo/<x>`) into `rev` if `<x>` is
34/// 40-hex, otherwise into `ref_`. The
35/// `location` field records where the value would be rendered on `Display`,
36/// so round-trips preserve `?ref=` vs `/ref` form. The "no ref and no rev"
37/// state is encoded by both fields being `None`; `location` still defaults
38/// to `PathComponent` for that case.
39///
40/// `#[non_exhaustive]` reserves room for future fields (e.g. `host`,
41/// `submodules`) to land here without breaking match arms downstream.
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
43#[non_exhaustive]
44pub struct GitForge {
45    pub platform: GitForgePlatform,
46    pub owner: String,
47    pub repo: String,
48    pub ref_: Option<String>,
49    pub rev: Option<String>,
50    pub location: RefLocation,
51}
52
53impl GitForgePlatform {
54    /// Parses the gitforge platform token: `<github|gitlab|sourcehut>`.
55    #[allow(dead_code)]
56    pub(crate) fn parse(input: &mut &str) -> ModalResult<Self> {
57        alt((
58            tag("github").value(Self::GitHub),
59            tag("gitlab").value(Self::GitLab),
60            tag("sourcehut").value(Self::SourceHut),
61        ))
62        .parse_next(input)
63    }
64    #[allow(dead_code)]
65    pub(crate) fn parse_terminated(input: &mut &str) -> ModalResult<Self> {
66        terminated(
67            Self::parse,
68            ':'.context(StrContext::Expected(StrContextValue::CharLiteral(':'))),
69        )
70        .parse_next(input)
71    }
72}
73
74impl GitForge {
75    /// `<owner>/<repo>[/?#]`
76    fn parse_owner_repo<'i>(input: &mut &'i str) -> ModalResult<(&'i str, &'i str)> {
77        cut_err(separated_pair(
78            take_till(1.., |c: char| c == '/')
79                .context(StrContext::Label("TakeTill1"))
80                .context(StrContext::Label("owner")),
81            '/'.context(StrContext::Expected(StrContextValue::CharLiteral('/'))),
82            take_till(1.., |c: char| c == '/' || c == '?' || c == '#')
83                .context(StrContext::Label("TakeTill1"))
84                .context(StrContext::Label("repo")),
85        ))
86        .context(StrContext::Label("owner and repo"))
87        .parse_next(input)
88    }
89
90    /// `/[foobar]<?#>...` -> `Option<foobar>`; consumes the leading `/` and
91    /// optionally the trailing token before `?` / `#`.
92    fn parse_rev_ref<'i>(input: &mut &'i str) -> ModalResult<Option<&'i str>> {
93        preceded(
94            '/'.context(StrContext::Expected(StrContextValue::CharLiteral('/'))),
95            opt(take_till(1.., |c: char| c == '?' || c == '#')
96                .context(StrContext::Label("TakeTill1"))),
97        )
98        .parse_next(input)
99    }
100
101    /// `<owner>/<repo>[/[value]] -> (owner, repo, value)`. The trailing
102    /// `value`, when present, is classified by the caller into either
103    /// `ref_` or `rev` via [`super::validators::looks_like_rev`].
104    #[allow(dead_code)]
105    pub(crate) fn parse_owner_repo_ref<'i>(
106        input: &mut &'i str,
107    ) -> ModalResult<(&'i str, &'i str, Option<&'i str>)> {
108        let (owner, repo) = Self::parse_owner_repo(input)?;
109        let maybe_refrev = opt(Self::parse_rev_ref).parse_next(input)?;
110        Ok((owner, repo, maybe_refrev.flatten()))
111    }
112
113    #[allow(dead_code)]
114    pub(crate) fn parse(input: &mut &str) -> ModalResult<Self> {
115        let platform = terminated(
116            GitForgePlatform::parse,
117            ':'.context(StrContext::Expected(StrContextValue::CharLiteral(':'))),
118        )
119        .parse_next(input)?;
120        let (owner, repo, maybe_value) = Self::parse_owner_repo_ref(input)?;
121        let (ref_, rev) = match maybe_value {
122            Some(v) if looks_like_rev(v) => (None, Some(v.to_string())),
123            Some(v) if validate_ref_name(v) => (Some(v.to_string()), None),
124            Some(_) => return Err(ErrMode::Cut(ContextError::new())),
125            None => (None, None),
126        };
127        Ok(Self {
128            platform,
129            owner: owner.to_string(),
130            repo: repo.to_string(),
131            ref_,
132            rev,
133            location: RefLocation::PathComponent,
134        })
135    }
136}
137
138/// `[a-zA-Z0-9._-]` is the strictest common alphabet across `github:`,
139/// `gitlab:`, and `sourcehut:`. `SourceHut` additionally permits a leading
140/// `~` on owner (e.g. `~misterio/nix-colors`); that is the only platform-
141/// specific carve-out.
142fn is_owner_repo_char(c: char) -> bool {
143    c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-')
144}
145
146/// Reject owner/repo strings that upstream Nix would accept syntactically
147/// but that no real forge would resolve. Lets parse-time errors stand in
148/// for the fetch-time failure a downstream consumer would otherwise see.
149///
150/// `owner` and `repo` are the post-percent-decode values the parser pulled
151/// out of the URL path; see [`super::fr_type::FlakeRefType::parse_type`]'s
152/// `GitForge` arm. A `gitlab:` owner is allowed to carry decoded `/` so
153/// nested-subgroup forms like `gitlab:veloren/dev/rfcs` (encoded
154/// `gitlab:veloren%2Fdev/rfcs` on the wire) round-trip; GitHub and
155/// `SourceHut` do not have a subgroup concept upstream, so the same input
156/// still rejects there.
157///
158/// The `field` name is `"owner"` or `"repo"` so consumers can pattern-match
159/// on which segment failed.
160pub(crate) fn validate_owner_repo(
161    platform: &GitForgePlatform,
162    owner: &str,
163    repo: &str,
164) -> NixUriResult<()> {
165    let owner_body = if matches!(platform, GitForgePlatform::SourceHut) {
166        owner.strip_prefix('~').unwrap_or(owner)
167    } else {
168        owner
169    };
170    if owner_body.is_empty() {
171        return Err(NixUriError::InvalidValue {
172            field: "owner",
173            reason: "owner must not be empty".to_string(),
174        });
175    }
176    if owner_body.contains('/') {
177        if !matches!(platform, GitForgePlatform::GitLab) {
178            return Err(NixUriError::InvalidValue {
179                field: "owner",
180                reason: "only gitlab owners may contain a '/' (subgroup form)".to_string(),
181            });
182        }
183        for segment in owner_body.split('/') {
184            if segment.is_empty() {
185                return Err(NixUriError::InvalidValue {
186                    field: "owner",
187                    reason: "subgroup owner must not have empty segments or leading/trailing '/'"
188                        .to_string(),
189                });
190            }
191            let first = segment.chars().next().unwrap();
192            if first == '-' || first == '.' {
193                return Err(NixUriError::InvalidValue {
194                    field: "owner",
195                    reason: "owner segment must not start with '-' or '.'".to_string(),
196                });
197            }
198            if !segment.chars().all(is_owner_repo_char) {
199                return Err(NixUriError::InvalidValue {
200                    field: "owner",
201                    reason: "owner segment contains a character outside [a-zA-Z0-9._-]".to_string(),
202                });
203            }
204        }
205    } else {
206        let owner_first = owner_body.chars().next().unwrap();
207        if owner_first == '-' || owner_first == '.' {
208            return Err(NixUriError::InvalidValue {
209                field: "owner",
210                reason: "owner must not start with '-' or '.'".to_string(),
211            });
212        }
213        if !owner_body.chars().all(is_owner_repo_char) {
214            return Err(NixUriError::InvalidValue {
215                field: "owner",
216                reason: "owner contains a character outside [a-zA-Z0-9._-]".to_string(),
217            });
218        }
219    }
220    if repo.is_empty() {
221        return Err(NixUriError::InvalidValue {
222            field: "repo",
223            reason: "repo must not be empty".to_string(),
224        });
225    }
226    if repo.starts_with('.') {
227        return Err(NixUriError::InvalidValue {
228            field: "repo",
229            reason: "repo must not start with '.'".to_string(),
230        });
231    }
232    if !repo.chars().all(is_owner_repo_char) {
233        return Err(NixUriError::InvalidValue {
234            field: "repo",
235            reason: "repo contains a character outside [a-zA-Z0-9._-]".to_string(),
236        });
237    }
238    Ok(())
239}
240
241impl Display for GitForgePlatform {
242    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
243        write!(
244            f,
245            "{}",
246            match self {
247                Self::GitHub => "github",
248                Self::GitLab => "gitlab",
249                Self::SourceHut => "sourcehut",
250            }
251        )
252    }
253}
254
255#[cfg(test)]
256mod inc_parse_platform {
257    use super::*;
258
259    #[test]
260    fn platform() {
261        let remain = ":nixos/nixpkgs";
262
263        let uri = "github:nixos/nixpkgs";
264
265        let (rest, platform) = GitForgePlatform::parse.parse_peek(uri).unwrap();
266        assert_eq!(rest, remain);
267        assert_eq!(platform, GitForgePlatform::GitHub);
268
269        let (rest, platform) = GitForgePlatform::parse_terminated.parse_peek(uri).unwrap();
270        assert_eq!(rest, &remain[1..]);
271        assert_eq!(platform, GitForgePlatform::GitHub);
272
273        let uri = "gitlab:nixos/nixpkgs";
274
275        let (rest, platform) = GitForgePlatform::parse.parse_peek(uri).unwrap();
276        assert_eq!(rest, remain);
277        assert_eq!(platform, GitForgePlatform::GitLab);
278
279        let uri = "sourcehut:nixos/nixpkgs";
280
281        let (rest, platform) = GitForgePlatform::parse.parse_peek(uri).unwrap();
282        assert_eq!(rest, remain);
283        assert_eq!(platform, GitForgePlatform::SourceHut);
284        // TODO?: fuzz test where `:` is preceded by bad string
285    }
286}
287
288#[cfg(test)]
289mod err_msgs {
290    use cool_asserts::assert_matches;
291
292    #[test]
293    fn just_owner_public_surface() {
294        use crate::{NixUriError, ParseExpected, parser::parse_nix_uri};
295
296        assert_matches!(
297            parse_nix_uri("github:owner"),
298            Err(NixUriError::Parse {
299                position: 12,
300                expected: ParseExpected::Char('/'),
301            })
302        );
303    }
304
305    #[test]
306    fn whitespace_in_owner_rejected() {
307        use crate::{NixUriError, parser::parse_nix_uri};
308        assert_matches!(
309            parse_nix_uri("github:bad owner/repo"),
310            Err(NixUriError::InvalidValue { field: "owner", .. })
311        );
312    }
313
314    #[test]
315    fn whitespace_in_repo_rejected() {
316        use crate::{NixUriError, parser::parse_nix_uri};
317        assert_matches!(
318            parse_nix_uri("github:owner/bad repo"),
319            Err(NixUriError::InvalidValue { field: "repo", .. })
320        );
321    }
322
323    #[test]
324    fn leading_dot_owner_rejected() {
325        use crate::{NixUriError, parser::parse_nix_uri};
326        assert_matches!(
327            parse_nix_uri("github:.dotted/repo"),
328            Err(NixUriError::InvalidValue { field: "owner", .. })
329        );
330    }
331
332    #[test]
333    fn leading_dash_owner_rejected() {
334        use crate::{NixUriError, parser::parse_nix_uri};
335        assert_matches!(
336            parse_nix_uri("github:-dashed/repo"),
337            Err(NixUriError::InvalidValue { field: "owner", .. })
338        );
339    }
340
341    #[test]
342    fn leading_dot_repo_rejected() {
343        use crate::{NixUriError, parser::parse_nix_uri};
344        assert_matches!(
345            parse_nix_uri("github:owner/.dotrepo"),
346            Err(NixUriError::InvalidValue { field: "repo", .. })
347        );
348    }
349
350    #[test]
351    fn special_char_in_owner_rejected() {
352        use crate::{NixUriError, parser::parse_nix_uri};
353        assert_matches!(
354            parse_nix_uri("github:bad!owner/repo"),
355            Err(NixUriError::InvalidValue { field: "owner", .. })
356        );
357    }
358
359    #[test]
360    fn tilde_in_github_owner_rejected() {
361        use crate::{NixUriError, parser::parse_nix_uri};
362        assert_matches!(
363            parse_nix_uri("github:~tilde/repo"),
364            Err(NixUriError::InvalidValue { field: "owner", .. })
365        );
366    }
367
368    #[test]
369    fn tilde_only_in_sourcehut_owner() {
370        use crate::parser::parse_nix_uri;
371        // SourceHut owner permits a leading `~` (e.g. `~misterio`).
372        parse_nix_uri("sourcehut:~owner/repo").expect("sourcehut owner with `~` is valid");
373        // GitLab and GitHub do not.
374        assert!(parse_nix_uri("gitlab:~owner/repo").is_err());
375        assert!(parse_nix_uri("github:~owner/repo").is_err());
376    }
377
378    #[test]
379    fn valid_forms_still_accepted() {
380        use crate::parser::parse_nix_uri;
381        for uri in [
382            "github:nixos/nixpkgs",
383            "github:nix.os/nix-pkgs",
384            "github:n_ix/r_epo",
385            "github:o-1/r.2",
386            "gitlab:owner/repo",
387            "sourcehut:nixos/nixpkgs",
388            "sourcehut:~misterio/nix-colors",
389        ] {
390            parse_nix_uri(uri).unwrap_or_else(|e| panic!("expected {uri:?} to parse, got {e}"));
391        }
392    }
393
394    /// GitLab nested subgroups are written as
395    /// `gitlab:veloren%2Fdev/rfcs` in Nix. The parser percent-decodes
396    /// each path segment after splitting on the literal `/`, so `%2F`
397    /// survives as a literal `/` inside the owner without colliding
398    /// with the owner-vs-repo boundary.
399    #[test]
400    fn gitlab_subgroup_percent_decode() {
401        use crate::{FlakeRef, FlakeRefType, GitForge, GitForgePlatform, parser::parse_nix_uri};
402        let uri = "gitlab:veloren%2Fdev/rfcs";
403        let parsed: FlakeRef = parse_nix_uri(uri).expect("subgroup form should parse");
404        match parsed.kind() {
405            FlakeRefType::GitForge(GitForge {
406                platform: GitForgePlatform::GitLab,
407                owner,
408                repo,
409                ..
410            }) => {
411                assert_eq!(owner, "veloren/dev");
412                assert_eq!(repo, "rfcs");
413            }
414            other => panic!("expected GitLab GitForge, got {other:?}"),
415        }
416        // Display re-encodes the `/` in owner so the wire form matches the input
417        // and the round-trip stays byte-stable.
418        assert_eq!(parsed.to_string(), uri);
419    }
420
421    #[test]
422    fn gitlab_deep_subgroup() {
423        use crate::{FlakeRefType, parser::parse_nix_uri};
424        let uri = "gitlab:o%2Fp%2Fq/r";
425        let parsed = parse_nix_uri(uri).expect("deep subgroup should parse");
426        match parsed.kind() {
427            FlakeRefType::GitForge(g) => {
428                assert_eq!(g.owner, "o/p/q");
429                assert_eq!(g.repo, "r");
430            }
431            other => panic!("expected GitForge, got {other:?}"),
432        }
433        assert_eq!(parsed.to_string(), uri);
434    }
435
436    #[test]
437    fn gitlab_subgroup_with_ref_query() {
438        use crate::{FlakeRefType, parser::parse_nix_uri};
439        let parsed =
440            parse_nix_uri("gitlab:o%2Fp/r?ref=main").expect("ref-query subgroup should parse");
441        match parsed.kind() {
442            FlakeRefType::GitForge(g) => {
443                assert_eq!(g.owner, "o/p");
444                assert_eq!(g.repo, "r");
445                assert_eq!(g.ref_.as_deref(), Some("main"));
446            }
447            other => panic!("expected GitForge, got {other:?}"),
448        }
449    }
450
451    #[test]
452    fn gitlab_subgroup_with_path_rev() {
453        use crate::{FlakeRefType, parser::parse_nix_uri};
454        let rev = "0123456789abcdef0123456789abcdef01234567";
455        let parsed = parse_nix_uri(&format!("gitlab:o%2Fp/r/{rev}"))
456            .expect("path-rev subgroup should parse");
457        match parsed.kind() {
458            FlakeRefType::GitForge(g) => {
459                assert_eq!(g.owner, "o/p");
460                assert_eq!(g.rev.as_deref(), Some(rev));
461                assert_eq!(g.ref_, None);
462            }
463            other => panic!("expected GitForge, got {other:?}"),
464        }
465    }
466
467    #[test]
468    fn gitlab_leading_slash_in_owner_rejected() {
469        use crate::{NixUriError, parser::parse_nix_uri};
470        assert_matches!(
471            parse_nix_uri("gitlab:%2Fp/r"),
472            Err(NixUriError::InvalidValue { field: "owner", .. })
473        );
474    }
475
476    #[test]
477    fn gitlab_trailing_slash_in_owner_rejected() {
478        use crate::{NixUriError, parser::parse_nix_uri};
479        assert_matches!(
480            parse_nix_uri("gitlab:p%2F/r"),
481            Err(NixUriError::InvalidValue { field: "owner", .. })
482        );
483    }
484
485    #[test]
486    fn gitlab_empty_subgroup_segment_rejected() {
487        use crate::{NixUriError, parser::parse_nix_uri};
488        assert_matches!(
489            parse_nix_uri("gitlab:p%2F%2Fq/r"),
490            Err(NixUriError::InvalidValue { field: "owner", .. })
491        );
492    }
493
494    /// GitHub has no subgroup concept and `validate_owner_repo` is the
495    /// parse-time stand-in for the fetch-time failure a downstream
496    /// consumer would otherwise see, so reject the subgroup form here
497    /// even though Nix accepts it syntactically (Nix applies the same
498    /// path-segment decode regardless of forge); the platform gate is a
499    /// nix-uri-only safety.
500    #[test]
501    fn github_subgroup_owner_rejected() {
502        use crate::{NixUriError, parser::parse_nix_uri};
503        assert_matches!(
504            parse_nix_uri("github:o%2Fp/r"),
505            Err(NixUriError::InvalidValue { field: "owner", .. })
506        );
507    }
508
509    #[test]
510    fn sourcehut_subgroup_owner_rejected() {
511        use crate::{NixUriError, parser::parse_nix_uri};
512        assert_matches!(
513            parse_nix_uri("sourcehut:o%2Fp/r"),
514            Err(NixUriError::InvalidValue { field: "owner", .. })
515        );
516    }
517}
518
519#[cfg(test)]
520mod inc_parse {
521    use super::*;
522
523    #[test]
524    fn plain() {
525        let input = "owner/repo";
526        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
527        let expected = ("owner", "repo", None);
528        assert_eq!(rest, "");
529        assert_eq!(expected, res);
530    }
531
532    #[test]
533    fn param_terminated() {
534        let input = "owner/repo?🤡";
535        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
536        let expected = ("owner", "repo", None);
537        assert_eq!(rest, "?🤡");
538        assert_eq!(expected, res);
539        assert_eq!(rest, "?🤡");
540
541        let input = "owner/repo#🤡";
542        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
543        let expected = ("owner", "repo", None);
544        assert_eq!(expected, res);
545        assert_eq!(rest, "#🤡");
546
547        let input = "owner/repo?#🤡";
548        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
549        let expected = ("owner", "repo", None);
550        assert_eq!(expected, res);
551        assert_eq!(rest, "?#🤡");
552    }
553
554    #[test]
555    fn attr_terminated() {
556        let input = "owner/repo#fizz.bar";
557        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
558        let expected = ("owner", "repo", None);
559        assert_eq!(rest, "#fizz.bar");
560        assert_eq!(expected, res);
561    }
562
563    #[test]
564    fn rev_param_terminated() {
565        let input = "owner/repo/rev?foo=bar";
566        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
567        let expected = ("owner", "repo", Some("rev"));
568        assert_eq!(rest, "?foo=bar");
569        assert_eq!(expected, res);
570    }
571
572    #[test]
573    fn rev_attr_terminated() {
574        let input = "owner/repo/rev#fizz.bar";
575        let (rest, res) = GitForge::parse_owner_repo_ref.parse_peek(input).unwrap();
576        let expected = ("owner", "repo", Some("rev"));
577        assert_eq!(rest, "#fizz.bar");
578        assert_eq!(expected, res);
579    }
580}