pakx_core/validation.rs
1//! Shape guards for untrusted strings that ride in URL path segments
2//! against the pakx-registry backend.
3//!
4//! The registry's RFC 3986 minimal percent-encoder
5//! (`urlencoding_minimal` in the registry client + `commands/info.rs`)
6//! leaves `.` in the unreserved set per the spec — which means a string
7//! of literally `..` produces a URL with a literal `..` segment that a
8//! normalizing reverse proxy (CDN, ALB, nginx with `merge_slashes off`)
9//! collapses upward, silently re-routing the call to the wrong
10//! endpoint. The encoder is doing the right thing; we need a separate
11//! shape guard on every input that lands inside a URL path segment
12//! before encoding.
13//!
14//! Two guards live here:
15//!
16//! - [`validate_package_name`] — for `<name>` segments (and reused via
17//! the registry client's own copy of the same logic).
18//! - [`validate_version`] — for `<version>` segments. Stricter than the
19//! name guard because semver versions have a well-defined character
20//! set (`[a-zA-Z0-9._+-]{1,64}` covers exact pins, build metadata,
21//! and pre-release tags).
22//!
23//! Both share the same error type so callers can route either through
24//! a single `match` arm in the CLI's error rendering.
25
26use std::fmt;
27
28/// Shape-guard failure for a string destined for a URL path segment.
29///
30/// Carries the offending input + the reason so the CLI can surface
31/// both — the input alone wouldn't tell the user *why* it was refused.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct ValidationError {
34 /// The string that failed the guard. Echoed in the rendered error
35 /// so the user sees exactly which input was refused.
36 pub input: String,
37 /// Human-friendly explanation of the failure.
38 pub reason: &'static str,
39}
40
41impl fmt::Display for ValidationError {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 write!(
44 f,
45 "invalid {input:?}: {reason}",
46 input = self.input,
47 reason = self.reason
48 )
49 }
50}
51
52impl std::error::Error for ValidationError {}
53
54/// Reject hostile package names before they reach the URL builder.
55///
56/// `urlencoding_minimal` follows RFC 3986 §2.3 and leaves `.` in the
57/// unreserved set, so a name like `..` produces a URL segment with a
58/// literal `..` — which most HTTP routers (and any normalizing
59/// reverse-proxy in front of the registry) collapse, silently
60/// re-routing the request to an unintended endpoint. The encoder is
61/// doing the right thing for `.`; we need a shape guard on the input.
62///
63/// Rejection rules:
64/// - empty
65/// - exactly `.` or `..`
66/// - starts with `.` (hidden-file convention; nothing legit needs it)
67/// - contains the substring `..` anywhere
68/// - contains `/`, `\`, or any ASCII control char
69///
70/// Used by the publish / unpublish path-segment builders; the registry
71/// client's `pakx_backend` module wraps this guard in a backend-
72/// specific error variant.
73pub fn validate_package_name(name: &str) -> Result<(), ValidationError> {
74 let reject = |reason: &'static str| ValidationError {
75 input: name.to_owned(),
76 reason,
77 };
78 if name.is_empty() {
79 return Err(reject("name must not be empty"));
80 }
81 if name == "." || name == ".." {
82 return Err(reject("name must not be `.` or `..`"));
83 }
84 if name.starts_with('.') {
85 return Err(reject("name must not start with `.`"));
86 }
87 if name.contains("..") {
88 return Err(reject("name must not contain `..`"));
89 }
90 for c in name.chars() {
91 if c == '/' || c == '\\' {
92 return Err(reject("name must not contain `/` or `\\`"));
93 }
94 if c.is_control() {
95 return Err(reject("name must not contain control characters"));
96 }
97 }
98 Ok(())
99}
100
101/// Maximum number of characters in a validated version segment.
102///
103/// The semver spec doesn't impose a cap (it allows arbitrarily long
104/// pre-release and build metadata tags) but the URL path segment
105/// realistically lives well under 64 chars — anything beyond is either
106/// a typo or an injection probe and rejecting it loud is safer than
107/// silently routing megabyte-long paths to the registry.
108pub const MAX_VERSION_LEN: usize = 64;
109
110/// Reject hostile version pins before they reach the URL builder.
111///
112/// Same threat model as [`validate_package_name`]: an unencoded `..`
113/// segment normalises away under a CDN. The version's allowed
114/// character set is well-defined (it's the union of what semver
115/// accepts: alphanumerics, dot, dash, plus, underscore — see
116/// <https://semver.org>) so we can apply a positive whitelist on top
117/// of the `..`-traversal rejection that names get.
118///
119/// Rejection rules:
120/// - empty
121/// - longer than [`MAX_VERSION_LEN`] characters
122/// - exactly `.` or `..`
123/// - starts with `.` (the empty-segment-then-traversal trick)
124/// - starts with `-` (would land in `clap`-style argument parsing on
125/// any shell tooling that consumes the version downstream)
126/// - contains the substring `..` anywhere
127/// - any character outside `[A-Za-z0-9._+-]`
128///
129/// Notably permits `+` (semver build metadata, e.g. `1.0.0+build.7`),
130/// `~` is **not** permitted (would let a `~user/...` traversal slip
131/// through if anyone ever concatenated this segment into a path on the
132/// CLI side).
133pub fn validate_version(version: &str) -> Result<(), ValidationError> {
134 let reject = |reason: &'static str| ValidationError {
135 input: version.to_owned(),
136 reason,
137 };
138 if version.is_empty() {
139 return Err(reject("version must not be empty"));
140 }
141 if version.len() > MAX_VERSION_LEN {
142 return Err(reject("version exceeds 64-character limit"));
143 }
144 if version == "." || version == ".." {
145 return Err(reject("version must not be `.` or `..`"));
146 }
147 if version.starts_with('.') {
148 return Err(reject("version must not start with `.`"));
149 }
150 if version.starts_with('-') {
151 return Err(reject("version must not start with `-`"));
152 }
153 if version.contains("..") {
154 return Err(reject("version must not contain `..`"));
155 }
156 for c in version.chars() {
157 let ok = c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '+' | '-');
158 if !ok {
159 return Err(reject(
160 "version must match [A-Za-z0-9._+-] (semver-friendly set)",
161 ));
162 }
163 }
164 Ok(())
165}
166
167#[cfg(test)]
168mod tests {
169 use super::*;
170
171 #[test]
172 fn name_accepts_plain() {
173 assert!(validate_package_name("foo").is_ok());
174 assert!(validate_package_name("foo-bar_baz.qux").is_ok());
175 }
176
177 #[test]
178 fn name_rejects_double_dot() {
179 assert_eq!(
180 validate_package_name("..").unwrap_err().reason,
181 "name must not be `.` or `..`"
182 );
183 }
184
185 #[test]
186 fn name_rejects_embedded_traversal() {
187 assert!(validate_package_name("foo..bar").is_err());
188 assert!(validate_package_name("foo/../bar").is_err());
189 }
190
191 #[test]
192 fn name_rejects_leading_dot_and_slash() {
193 assert!(validate_package_name(".hidden").is_err());
194 assert!(validate_package_name("a/b").is_err());
195 assert!(validate_package_name("a\\b").is_err());
196 }
197
198 #[test]
199 fn version_accepts_exact_semver() {
200 assert!(validate_version("0.1.0").is_ok());
201 assert!(validate_version("1.0.0-rc.1").is_ok());
202 assert!(validate_version("1.0.0+build.7").is_ok());
203 assert!(validate_version("v1.0.0").is_ok());
204 // Lowercased alphanumeric pre-release identifiers.
205 assert!(validate_version("0.1.0-alpha.2").is_ok());
206 }
207
208 #[test]
209 fn version_rejects_empty_and_double_dot() {
210 assert_eq!(
211 validate_version("").unwrap_err().reason,
212 "version must not be empty"
213 );
214 assert_eq!(
215 validate_version("..").unwrap_err().reason,
216 "version must not be `.` or `..`"
217 );
218 assert!(validate_version("../etc").is_err());
219 assert!(validate_version("1..0").is_err());
220 }
221
222 #[test]
223 fn version_rejects_leading_dot_or_dash() {
224 assert!(validate_version(".5").is_err());
225 // A leading `-` would be picked up as a flag by any downstream
226 // shell-arg tooling that pastes the version into a command.
227 assert!(validate_version("-1.0.0").is_err());
228 }
229
230 #[test]
231 fn version_rejects_disallowed_chars() {
232 assert!(validate_version("1.0.0 ").is_err());
233 assert!(validate_version("1.0.0/x").is_err());
234 assert!(validate_version("1.0.0%2F").is_err());
235 // `~` is deliberately NOT permitted — it's a path-tilde-expansion
236 // marker in any shell that walks the version into a filesystem
237 // path downstream. (`urlencoding_minimal` leaves it unencoded
238 // per RFC 3986, so it would reach the wire as `~`.)
239 assert!(validate_version("1.0.0~rc").is_err());
240 // Single-byte control char.
241 assert!(validate_version("1.0.0\n").is_err());
242 }
243
244 #[test]
245 fn version_rejects_overlong_input() {
246 let too_long: String = "1".repeat(MAX_VERSION_LEN + 1);
247 assert_eq!(
248 validate_version(&too_long).unwrap_err().reason,
249 "version exceeds 64-character limit"
250 );
251 // Exactly at the cap is OK.
252 let at_cap: String = "1".repeat(MAX_VERSION_LEN);
253 assert!(validate_version(&at_cap).is_ok());
254 }
255}