Skip to main content

omne_cli/
distro.rs

1//! Distro specifier parsing.
2//!
3//! Accepts four input forms and normalizes each to a strongly-typed
4//! `DistroSpec { org, repo }`:
5//!
6//!   - bare name `omne-nosce` → `{ omne-org, omne-nosce }` (default org)
7//!   - `<org>/<repo>` → `{ org, repo }` (trailing `.git` stripped)
8//!   - HTTPS URL `https://github.com/<org>/<repo>(.git)?`
9//!   - SSH URL `git@github.com:<org>/<repo>(.git)?`
10//!
11//! The returned struct drops the URL form the Python parser emitted
12//! because downstream code (github.rs in Unit 4) keys the GitHub
13//! Releases API on `(org, repo)` — the URL was never actually used.
14//!
15//! **Fixes two Python bugs** (R12):
16//!   - `file://` URLs are rejected outright, not silently accepted.
17//!   - Non-github.com HTTPS/SSH hosts are rejected explicitly.
18
19#![allow(dead_code)]
20
21use thiserror::Error;
22
23/// A parsed distro reference keyed on `(org, repo)`.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct DistroSpec {
26    pub org: String,
27    pub repo: String,
28}
29
30/// Errors produced by `parse()`. Wrapped into `CliError::Distro` at the
31/// command boundary once Unit 8a wires distro parsing into `init::run`.
32#[derive(Debug, Error, PartialEq, Eq)]
33pub enum Error {
34    /// The specifier is not one of the four accepted forms, or it
35    /// names a scheme (`file://`) or host we deliberately refuse.
36    #[error("unsupported distro specifier `{spec}`: {reason}")]
37    UnsupportedSpec { spec: String, reason: &'static str },
38}
39
40/// Parse a distro specifier into `DistroSpec { org, repo }`.
41pub fn parse(spec: &str) -> Result<DistroSpec, Error> {
42    // file:// is rejected outright — the Python parser silently
43    // accepted these and produced no working end-to-end path (R12).
44    if spec.starts_with("file://") {
45        return Err(Error::UnsupportedSpec {
46            spec: spec.to_string(),
47            reason: "file:// URLs are not supported; use a GitHub org/repo specifier",
48        });
49    }
50
51    // HTTPS URL — only github.com.
52    if let Some(rest) = spec.strip_prefix("https://") {
53        let path = rest
54            .strip_prefix("github.com/")
55            .ok_or_else(|| Error::UnsupportedSpec {
56                spec: spec.to_string(),
57                reason: "only github.com HTTPS URLs are supported",
58            })?;
59        return parse_org_repo_path(spec, path);
60    }
61
62    // SSH URL — only github.com.
63    if let Some(rest) = spec.strip_prefix("git@") {
64        let path = rest
65            .strip_prefix("github.com:")
66            .ok_or_else(|| Error::UnsupportedSpec {
67                spec: spec.to_string(),
68                reason: "only git@github.com: SSH URLs are supported",
69            })?;
70        return parse_org_repo_path(spec, path);
71    }
72
73    if spec.is_empty() {
74        return Err(Error::UnsupportedSpec {
75            spec: spec.to_string(),
76            reason: "distro specifier must not be empty",
77        });
78    }
79
80    // `<org>/<repo>` shorthand.
81    if let Some((org, repo)) = spec.split_once('/') {
82        if org.is_empty() || repo.is_empty() || repo.contains('/') {
83            return Err(Error::UnsupportedSpec {
84                spec: spec.to_string(),
85                reason: "expected `<org>/<repo>` with no extra path components",
86            });
87        }
88        let repo = strip_git_suffix_required(repo, spec)?;
89        validate_segment(org, spec)?;
90        validate_segment(repo, spec)?;
91        return Ok(DistroSpec {
92            org: org.to_string(),
93            repo: repo.to_string(),
94        });
95    }
96
97    // Bare name — default to omne-org. The hardcoded `omne-org` org is
98    // safe by construction; only the user-supplied repo needs validation.
99    let repo = strip_git_suffix_required(spec, spec)?;
100    validate_segment(repo, spec)?;
101    Ok(DistroSpec {
102        org: "omne-org".to_string(),
103        repo: repo.to_string(),
104    })
105}
106
107/// Extract `<org>/<repo>` from the path portion of an HTTPS or SSH URL.
108/// Rejects empty org/repo and any URL with extra path components
109/// (`org/repo/extra`) — GitHub Releases only exist at the repo level.
110fn parse_org_repo_path(spec: &str, path: &str) -> Result<DistroSpec, Error> {
111    let (org, repo) = path.split_once('/').ok_or_else(|| Error::UnsupportedSpec {
112        spec: spec.to_string(),
113        reason: "expected `<org>/<repo>` in URL path",
114    })?;
115
116    if org.is_empty() || repo.is_empty() {
117        return Err(Error::UnsupportedSpec {
118            spec: spec.to_string(),
119            reason: "org and repo must be non-empty",
120        });
121    }
122
123    let repo = strip_git_suffix_required(repo, spec)?;
124
125    // Reject `org/repo/extra` — after stripping .git, the tail must
126    // be a single path segment.
127    if repo.contains('/') {
128        return Err(Error::UnsupportedSpec {
129            spec: spec.to_string(),
130            reason: "URL path must be exactly `<org>/<repo>(.git)?`",
131        });
132    }
133
134    validate_segment(org, spec)?;
135    validate_segment(repo, spec)?;
136
137    Ok(DistroSpec {
138        org: org.to_string(),
139        repo: repo.to_string(),
140    })
141}
142
143fn strip_git_suffix(s: &str) -> &str {
144    s.strip_suffix(".git").unwrap_or(s)
145}
146
147/// Strip a trailing `.git` from a repo segment and reject the result if
148/// it is empty. Catches `.git`, `org/.git`, and URL paths ending in
149/// `.git` — all of which previously produced `DistroSpec { repo: "" }`
150/// because `strip_git_suffix(".git")` returned the empty string.
151fn strip_git_suffix_required<'a>(repo: &'a str, spec: &str) -> Result<&'a str, Error> {
152    let stripped = strip_git_suffix(repo);
153    if stripped.is_empty() {
154        return Err(Error::UnsupportedSpec {
155            spec: spec.to_string(),
156            reason: "repo name must not be empty after stripping `.git` suffix",
157        });
158    }
159    Ok(stripped)
160}
161
162/// Validate that a parsed `org` or `repo` segment is structurally safe
163/// to interpolate into URLs, command-line arguments, and filesystem
164/// paths. Called at every success path of `parse()` so the returned
165/// `DistroSpec` needs no further escaping at downstream call sites.
166///
167/// Enforces a **positive allowlist** matching GitHub's actual repo and
168/// org name charset (`[a-zA-Z0-9._-]`), plus three structural rules:
169///
170///   - non-empty (defense-in-depth — also caught earlier)
171///   - not literally `.` or `..` (path traversal / dot-segment)
172///   - does not start with `-` (CLI argument confusion when interpolated
173///     into `git clone` or similar)
174///
175/// `@` gets a more specific error message than the generic allowlist
176/// rejection because it is a plausible thing for users to type (the
177/// `@ref` version pin syntax) and we may add a `--ref` flag later.
178///
179/// The allowlist subsumes the older denylist of NUL, control chars,
180/// `%`, `/`, `\`, and additionally rejects URL query/fragment
181/// delimiters (`?`, `#`), whitespace, quotes, and other punctuation
182/// that would otherwise build invalid GitHub API URLs downstream.
183fn validate_segment(segment: &str, spec: &str) -> Result<(), Error> {
184    let bad = |reason: &'static str| Error::UnsupportedSpec {
185        spec: spec.to_string(),
186        reason,
187    };
188
189    if segment.is_empty() {
190        return Err(bad("segment must not be empty"));
191    }
192    if segment == "." || segment == ".." {
193        return Err(bad("segment must not be `.` or `..`"));
194    }
195    if segment.starts_with('-') {
196        return Err(bad("segment must not start with `-`"));
197    }
198    for c in segment.chars() {
199        if c == '@' {
200            return Err(bad(
201                "`@` is not supported in distro segments (no @ref version pins)",
202            ));
203        }
204        if !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.') {
205            return Err(bad(
206                "segment may only contain ASCII alphanumerics, `-`, `_`, and `.`",
207            ));
208        }
209    }
210    Ok(())
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    // ----- Happy paths (port of `test_distro.py`) -----
218
219    #[test]
220    fn parse_bare_name_defaults_to_omne_org() {
221        let spec = parse("omne-faber").expect("bare name should parse");
222        assert_eq!(
223            spec,
224            DistroSpec {
225                org: "omne-org".to_string(),
226                repo: "omne-faber".to_string(),
227            }
228        );
229    }
230
231    #[test]
232    fn parse_full_spec_with_org() {
233        let spec = parse("omne-org/omne-faber").expect("org/repo should parse");
234        assert_eq!(
235            spec,
236            DistroSpec {
237                org: "omne-org".to_string(),
238                repo: "omne-faber".to_string(),
239            }
240        );
241    }
242
243    #[test]
244    fn parse_different_org() {
245        let spec = parse("acme-corp/omne-custom").expect("any org should parse");
246        assert_eq!(
247            spec,
248            DistroSpec {
249                org: "acme-corp".to_string(),
250                repo: "omne-custom".to_string(),
251            }
252        );
253    }
254
255    #[test]
256    fn parse_https_url() {
257        let spec =
258            parse("https://github.com/myorg/my-distro.git").expect("github HTTPS URL should parse");
259        assert_eq!(
260            spec,
261            DistroSpec {
262                org: "myorg".to_string(),
263                repo: "my-distro".to_string(),
264            }
265        );
266    }
267
268    #[test]
269    fn parse_https_url_without_dotgit_suffix() {
270        let spec = parse("https://github.com/myorg/my-distro")
271            .expect("github HTTPS URL without .git should parse");
272        assert_eq!(
273            spec,
274            DistroSpec {
275                org: "myorg".to_string(),
276                repo: "my-distro".to_string(),
277            }
278        );
279    }
280
281    #[test]
282    fn parse_ssh_url() {
283        let spec =
284            parse("git@github.com:omne-org/omne-faber.git").expect("github SSH URL should parse");
285        assert_eq!(
286            spec,
287            DistroSpec {
288                org: "omne-org".to_string(),
289                repo: "omne-faber".to_string(),
290            }
291        );
292    }
293
294    // ----- Edge: `.git` stripping -----
295
296    #[test]
297    fn parse_org_repo_strips_trailing_dotgit() {
298        let spec =
299            parse("omne-org/omne-faber.git").expect("trailing .git on org/repo should strip");
300        assert_eq!(spec.repo, "omne-faber");
301    }
302
303    // ----- Error paths (new — guarding Python bugs) -----
304
305    #[test]
306    fn parse_file_url_is_rejected() {
307        // Previously silently accepted by the Python parser with no
308        // working end-to-end path. R12 mandates hard rejection.
309        let err = parse("file:///tmp/distro").expect_err("file:// should be rejected");
310        match err {
311            Error::UnsupportedSpec { spec, .. } => {
312                assert_eq!(spec, "file:///tmp/distro");
313            }
314        }
315    }
316
317    #[test]
318    fn parse_non_github_https_is_rejected() {
319        // The Python parser implicitly assumed github.com because it
320        // only constructed github URLs in the default path. Any other
321        // HTTPS host was accepted as a pass-through that would later
322        // fail in the release-fetch step. Reject cleanly instead.
323        let err = parse("https://gitlab.com/org/repo")
324            .expect_err("non-github HTTPS host should be rejected");
325        match err {
326            Error::UnsupportedSpec { spec, .. } => {
327                assert_eq!(spec, "https://gitlab.com/org/repo");
328            }
329        }
330    }
331
332    #[test]
333    fn parse_non_github_ssh_is_rejected() {
334        let err = parse("git@gitlab.com:org/repo.git")
335            .expect_err("non-github SSH host should be rejected");
336        match err {
337            Error::UnsupportedSpec { spec, .. } => {
338                assert_eq!(spec, "git@gitlab.com:org/repo.git");
339            }
340        }
341    }
342
343    #[test]
344    fn parse_empty_string_is_rejected() {
345        let err = parse("").expect_err("empty specifier should be rejected");
346        match err {
347            Error::UnsupportedSpec { .. } => {}
348        }
349    }
350
351    #[test]
352    fn parse_url_with_extra_path_components_is_rejected() {
353        // `github.com/org/repo/extra` is not a valid distro spec —
354        // GitHub Releases only exist at the repo level.
355        let err = parse("https://github.com/myorg/my-distro/extra")
356            .expect_err("extra path components should be rejected");
357        match err {
358            Error::UnsupportedSpec { .. } => {}
359        }
360    }
361
362    // ----- Edge: bare `.git` and `.git`-only repo segments -----
363    //
364    // Pre-fix behavior: `strip_git_suffix(".git")` returned `""`, which
365    // propagated into a `DistroSpec { repo: "" }` from all three call
366    // sites (bare name, org/repo shorthand, URL path). Each must now
367    // surface `UnsupportedSpec` instead.
368
369    #[test]
370    fn parse_bare_dot_git_is_rejected() {
371        let err = parse(".git").expect_err("bare `.git` should be rejected");
372        match err {
373            Error::UnsupportedSpec { .. } => {}
374        }
375    }
376
377    #[test]
378    fn parse_org_slash_dot_git_is_rejected() {
379        let err = parse("org/.git").expect_err("`org/.git` should be rejected");
380        match err {
381            Error::UnsupportedSpec { .. } => {}
382        }
383    }
384
385    #[test]
386    fn parse_https_url_with_dot_git_only_repo_is_rejected() {
387        let err = parse("https://github.com/org/.git")
388            .expect_err("`https://github.com/org/.git` should be rejected");
389        match err {
390            Error::UnsupportedSpec { .. } => {}
391        }
392    }
393
394    // ----- Edge: structural segment validation (B2) -----
395    //
396    // Each parsed `org` and `repo` must be structurally safe to
397    // interpolate into URLs, command-line arguments, and filesystem
398    // paths. The pre-fix parser accepted dot-segments, version-pin
399    // `@ref` syntax, control characters, NUL bytes, percent-encoding,
400    // and leading `-`, all of which were either security risks
401    // (path traversal, header injection) or sources of CLI/argument
402    // confusion in downstream call sites.
403
404    #[test]
405    fn parse_dotdot_org_segment_is_rejected() {
406        let err = parse("../evil").expect_err("`..` org segment should be rejected");
407        match err {
408            Error::UnsupportedSpec { .. } => {}
409        }
410    }
411
412    #[test]
413    fn parse_at_ref_version_pin_is_rejected() {
414        // Defers `@ref` version pin parsing to a future `--ref` flag.
415        let err =
416            parse("omne-nosce@v1.0").expect_err("`@ref` version pin should be rejected for now");
417        match err {
418            Error::UnsupportedSpec { .. } => {}
419        }
420    }
421
422    #[test]
423    fn parse_crlf_in_repo_segment_is_rejected() {
424        let err = parse("omne-org/repo\r\n").expect_err("CRLF in repo should be rejected");
425        match err {
426            Error::UnsupportedSpec { .. } => {}
427        }
428    }
429
430    #[test]
431    fn parse_nul_in_repo_segment_is_rejected() {
432        let err = parse("omne-org/repo\0extra").expect_err("NUL in repo should be rejected");
433        match err {
434            Error::UnsupportedSpec { .. } => {}
435        }
436    }
437
438    #[test]
439    fn parse_percent_encoding_in_url_segment_is_rejected() {
440        let err = parse("https://github.com/%2E%2E/repo")
441            .expect_err("percent-encoded `..` should be rejected");
442        match err {
443            Error::UnsupportedSpec { .. } => {}
444        }
445    }
446
447    #[test]
448    fn parse_leading_dash_org_segment_is_rejected() {
449        // Guards future CLI-argument confusion if `org` is interpolated
450        // unquoted into a `git clone` invocation.
451        let err = parse("-invalid/repo").expect_err("leading `-` org segment should be rejected");
452        match err {
453            Error::UnsupportedSpec { .. } => {}
454        }
455    }
456
457    // Acceptance tests — guard against over-rejection by validate_segment.
458    // GitHub allows dashes, underscores, dots, and numeric leading characters
459    // in repo and org names; these must continue to parse cleanly.
460
461    #[test]
462    fn parse_repo_with_dashes_is_accepted() {
463        let spec = parse("omne-org/repo-with-dashes").expect("dashes in repo should be accepted");
464        assert_eq!(spec.repo, "repo-with-dashes");
465    }
466
467    #[test]
468    fn parse_repo_with_underscores_is_accepted() {
469        let spec = parse("omne-org/repo_with_underscores")
470            .expect("underscores in repo should be accepted");
471        assert_eq!(spec.repo, "repo_with_underscores");
472    }
473
474    #[test]
475    fn parse_repo_with_internal_dots_is_accepted() {
476        // Single dots in the middle of a segment are fine — only `.`
477        // and `..` as whole segments are rejected.
478        let spec =
479            parse("omne-org/repo.with.dots").expect("internal dots in repo should be accepted");
480        assert_eq!(spec.repo, "repo.with.dots");
481    }
482
483    #[test]
484    fn parse_numeric_leading_org_and_repo_are_accepted() {
485        let spec = parse("123org/456repo").expect("numeric-leading names should be accepted");
486        assert_eq!(spec.org, "123org");
487        assert_eq!(spec.repo, "456repo");
488    }
489
490    // ----- Edge: positive-allowlist enforcement -----
491    //
492    // The original `validate_segment` used a denylist of bad characters
493    // (control chars, NUL, `%`, `/`, `\`, `@`, leading `-`). That left a
494    // gap: URL query/fragment delimiters (`?`, `#`) and ASCII whitespace
495    // would slip through and produce specs that look valid but later
496    // build invalid GitHub API URLs. The fix is to require every char
497    // be in `[a-zA-Z0-9._-]` (GitHub's actual repo/org name charset).
498
499    #[test]
500    fn parse_url_with_query_string_is_rejected() {
501        let err = parse("https://github.com/org/repo.git?ref=foo")
502            .expect_err("URL query string should be rejected");
503        match err {
504            Error::UnsupportedSpec { .. } => {}
505        }
506    }
507
508    #[test]
509    fn parse_url_with_fragment_is_rejected() {
510        let err =
511            parse("https://github.com/org/repo#frag").expect_err("URL fragment should be rejected");
512        match err {
513            Error::UnsupportedSpec { .. } => {}
514        }
515    }
516
517    #[test]
518    fn parse_segment_with_whitespace_is_rejected() {
519        let err = parse("omne-org/repo with space")
520            .expect_err("whitespace in repo segment should be rejected");
521        match err {
522            Error::UnsupportedSpec { .. } => {}
523        }
524    }
525}