Skip to main content

pakx_core/
validation.rs

1//! Shape guards for untrusted strings that ride in URL path segments
2//! against the pakx-registry backend.
3//!
4//! The registry's RFC 3986 minimal percent-encoder
5//! (`urlencoding_minimal` in the registry client + `commands/info.rs`)
6//! leaves `.` in the unreserved set per the spec — which means a string
7//! of literally `..` produces a URL with a literal `..` segment that a
8//! normalizing reverse proxy (CDN, ALB, nginx with `merge_slashes off`)
9//! collapses upward, silently re-routing the call to the wrong
10//! endpoint. The encoder is doing the right thing; we need a separate
11//! shape guard on every input that lands inside a URL path segment
12//! before encoding.
13//!
14//! Two guards live here:
15//!
16//! - [`validate_package_name`] — for `<name>` segments (and reused via
17//!   the registry client's own copy of the same logic).
18//! - [`validate_version`] — for `<version>` segments. Stricter than the
19//!   name guard because semver versions have a well-defined character
20//!   set (`[a-zA-Z0-9._+-]{1,64}` covers exact pins, build metadata,
21//!   and pre-release tags).
22//!
23//! Both share the same error type so callers can route either through
24//! a single `match` arm in the CLI's error rendering.
25
26use std::fmt;
27
28/// Shape-guard failure for a string destined for a URL path segment.
29///
30/// Carries the offending input + the reason so the CLI can surface
31/// both — the input alone wouldn't tell the user *why* it was refused.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct ValidationError {
34    /// The string that failed the guard. Echoed in the rendered error
35    /// so the user sees exactly which input was refused.
36    pub input: String,
37    /// Human-friendly explanation of the failure.
38    pub reason: &'static str,
39}
40
41impl fmt::Display for ValidationError {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        write!(
44            f,
45            "invalid {input:?}: {reason}",
46            input = self.input,
47            reason = self.reason
48        )
49    }
50}
51
52impl std::error::Error for ValidationError {}
53
54/// Reject hostile package names before they reach the URL builder.
55///
56/// `urlencoding_minimal` follows RFC 3986 §2.3 and leaves `.` in the
57/// unreserved set, so a name like `..` produces a URL segment with a
58/// literal `..` — which most HTTP routers (and any normalizing
59/// reverse-proxy in front of the registry) collapse, silently
60/// re-routing the request to an unintended endpoint. The encoder is
61/// doing the right thing for `.`; we need a shape guard on the input.
62///
63/// Rejection rules:
64/// - empty
65/// - exactly `.` or `..`
66/// - starts with `.` (hidden-file convention; nothing legit needs it)
67/// - contains the substring `..` anywhere
68/// - contains `/`, `\`, or any ASCII control char
69///
70/// Used by the publish / unpublish path-segment builders; the registry
71/// client's `pakx_backend` module wraps this guard in a backend-
72/// specific error variant.
73pub fn validate_package_name(name: &str) -> Result<(), ValidationError> {
74    let reject = |reason: &'static str| ValidationError {
75        input: name.to_owned(),
76        reason,
77    };
78    if name.is_empty() {
79        return Err(reject("name must not be empty"));
80    }
81    if name == "." || name == ".." {
82        return Err(reject("name must not be `.` or `..`"));
83    }
84    if name.starts_with('.') {
85        return Err(reject("name must not start with `.`"));
86    }
87    if name.contains("..") {
88        return Err(reject("name must not contain `..`"));
89    }
90    for c in name.chars() {
91        if c == '/' || c == '\\' {
92            return Err(reject("name must not contain `/` or `\\`"));
93        }
94        if c.is_control() {
95            return Err(reject("name must not contain control characters"));
96        }
97    }
98    Ok(())
99}
100
101/// Maximum number of characters in a validated version segment.
102///
103/// The semver spec doesn't impose a cap (it allows arbitrarily long
104/// pre-release and build metadata tags) but the URL path segment
105/// realistically lives well under 64 chars — anything beyond is either
106/// a typo or an injection probe and rejecting it loud is safer than
107/// silently routing megabyte-long paths to the registry.
108pub const MAX_VERSION_LEN: usize = 64;
109
110/// Reject hostile version pins before they reach the URL builder.
111///
112/// Same threat model as [`validate_package_name`]: an unencoded `..`
113/// segment normalises away under a CDN. The version's allowed
114/// character set is well-defined (it's the union of what semver
115/// accepts: alphanumerics, dot, dash, plus, underscore — see
116/// <https://semver.org>) so we can apply a positive whitelist on top
117/// of the `..`-traversal rejection that names get.
118///
119/// Rejection rules:
120/// - empty
121/// - longer than [`MAX_VERSION_LEN`] characters
122/// - exactly `.` or `..`
123/// - starts with `.` (the empty-segment-then-traversal trick)
124/// - starts with `-` (would land in `clap`-style argument parsing on
125///   any shell tooling that consumes the version downstream)
126/// - contains the substring `..` anywhere
127/// - any character outside `[A-Za-z0-9._+-]`
128///
129/// Notably permits `+` (semver build metadata, e.g. `1.0.0+build.7`),
130/// `~` is **not** permitted (would let a `~user/...` traversal slip
131/// through if anyone ever concatenated this segment into a path on the
132/// CLI side).
133pub fn validate_version(version: &str) -> Result<(), ValidationError> {
134    let reject = |reason: &'static str| ValidationError {
135        input: version.to_owned(),
136        reason,
137    };
138    if version.is_empty() {
139        return Err(reject("version must not be empty"));
140    }
141    if version.len() > MAX_VERSION_LEN {
142        return Err(reject("version exceeds 64-character limit"));
143    }
144    if version == "." || version == ".." {
145        return Err(reject("version must not be `.` or `..`"));
146    }
147    if version.starts_with('.') {
148        return Err(reject("version must not start with `.`"));
149    }
150    if version.starts_with('-') {
151        return Err(reject("version must not start with `-`"));
152    }
153    if version.contains("..") {
154        return Err(reject("version must not contain `..`"));
155    }
156    for c in version.chars() {
157        let ok = c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '+' | '-');
158        if !ok {
159            return Err(reject(
160                "version must match [A-Za-z0-9._+-] (semver-friendly set)",
161            ));
162        }
163    }
164    Ok(())
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn name_accepts_plain() {
173        assert!(validate_package_name("foo").is_ok());
174        assert!(validate_package_name("foo-bar_baz.qux").is_ok());
175    }
176
177    #[test]
178    fn name_rejects_double_dot() {
179        assert_eq!(
180            validate_package_name("..").unwrap_err().reason,
181            "name must not be `.` or `..`"
182        );
183    }
184
185    #[test]
186    fn name_rejects_embedded_traversal() {
187        assert!(validate_package_name("foo..bar").is_err());
188        assert!(validate_package_name("foo/../bar").is_err());
189    }
190
191    #[test]
192    fn name_rejects_leading_dot_and_slash() {
193        assert!(validate_package_name(".hidden").is_err());
194        assert!(validate_package_name("a/b").is_err());
195        assert!(validate_package_name("a\\b").is_err());
196    }
197
198    #[test]
199    fn version_accepts_exact_semver() {
200        assert!(validate_version("0.1.0").is_ok());
201        assert!(validate_version("1.0.0-rc.1").is_ok());
202        assert!(validate_version("1.0.0+build.7").is_ok());
203        assert!(validate_version("v1.0.0").is_ok());
204        // Lowercased alphanumeric pre-release identifiers.
205        assert!(validate_version("0.1.0-alpha.2").is_ok());
206    }
207
208    #[test]
209    fn version_rejects_empty_and_double_dot() {
210        assert_eq!(
211            validate_version("").unwrap_err().reason,
212            "version must not be empty"
213        );
214        assert_eq!(
215            validate_version("..").unwrap_err().reason,
216            "version must not be `.` or `..`"
217        );
218        assert!(validate_version("../etc").is_err());
219        assert!(validate_version("1..0").is_err());
220    }
221
222    #[test]
223    fn version_rejects_leading_dot_or_dash() {
224        assert!(validate_version(".5").is_err());
225        // A leading `-` would be picked up as a flag by any downstream
226        // shell-arg tooling that pastes the version into a command.
227        assert!(validate_version("-1.0.0").is_err());
228    }
229
230    #[test]
231    fn version_rejects_disallowed_chars() {
232        assert!(validate_version("1.0.0 ").is_err());
233        assert!(validate_version("1.0.0/x").is_err());
234        assert!(validate_version("1.0.0%2F").is_err());
235        // `~` is deliberately NOT permitted — it's a path-tilde-expansion
236        // marker in any shell that walks the version into a filesystem
237        // path downstream. (`urlencoding_minimal` leaves it unencoded
238        // per RFC 3986, so it would reach the wire as `~`.)
239        assert!(validate_version("1.0.0~rc").is_err());
240        // Single-byte control char.
241        assert!(validate_version("1.0.0\n").is_err());
242    }
243
244    #[test]
245    fn version_rejects_overlong_input() {
246        let too_long: String = "1".repeat(MAX_VERSION_LEN + 1);
247        assert_eq!(
248            validate_version(&too_long).unwrap_err().reason,
249            "version exceeds 64-character limit"
250        );
251        // Exactly at the cap is OK.
252        let at_cap: String = "1".repeat(MAX_VERSION_LEN);
253        assert!(validate_version(&at_cap).is_ok());
254    }
255}