Skip to main content

ferro_oci_server/
reference.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Repository-name and reference parsing.
3//!
4//! Spec: OCI Distribution Spec v1.1 §2 "Definitions".
5//!
6//! - A repository `<name>` matches
7//!   `[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?(?:/[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?)*`
8//!   with a total length <= 255 characters.
9//! - A `<reference>` is either a tag (matching
10//!   `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`) or a digest (`<algo>:<hex>`).
11//!
12//! The conformance suite exercises the edge cases (double underscores,
13//! leading uppercase, names ending with `-`) so we enforce the regex
14//! manually rather than rely on `regex::Regex` (which would pull an
15//! extra dep).
16
17use std::fmt;
18use std::str::FromStr;
19
20use ferro_blob_store::Digest;
21
22use crate::error::{OciError, OciErrorCode};
23
24/// Maximum total length of a repository name (spec §2).
25pub const MAX_NAME_LENGTH: usize = 255;
26
27/// Maximum length of a tag reference (spec §2: 128 characters).
28pub const MAX_TAG_LENGTH: usize = 128;
29
30/// Validate a repository name against the OCI Distribution Spec v1.1
31/// name grammar.
32///
33/// # Errors
34///
35/// Returns an [`OciError`] with code `NAME_INVALID` if the name violates
36/// any part of the grammar.
37pub fn validate_name(name: &str) -> Result<(), OciError> {
38    if name.is_empty() {
39        return Err(OciError::new(
40            OciErrorCode::NameInvalid,
41            "repository name must not be empty",
42        ));
43    }
44    if name.len() > MAX_NAME_LENGTH {
45        return Err(OciError::new(
46            OciErrorCode::NameInvalid,
47            format!("repository name exceeds {MAX_NAME_LENGTH} characters"),
48        ));
49    }
50
51    // The name grammar is a `/`-joined sequence of path components.
52    // Each component matches
53    //     [a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?
54    // which reduces to: starts with [a-z0-9]+, ends with [a-z0-9]+,
55    // and any internal run of separators is one of `.`, `_`, `__`, or
56    // one-or-more `-`.
57    for component in name.split('/') {
58        validate_component(component)
59            .map_err(|msg| OciError::new(OciErrorCode::NameInvalid, msg))?;
60    }
61    Ok(())
62}
63
64fn validate_component(component: &str) -> Result<(), String> {
65    if component.is_empty() {
66        return Err("path component must not be empty".to_owned());
67    }
68    let bytes = component.as_bytes();
69    // Must start with an alphanumeric.
70    if !is_alnum(bytes[0]) {
71        return Err(format!("component `{component}` must start with [a-z0-9]"));
72    }
73    // Must end with an alphanumeric.
74    if !is_alnum(bytes[bytes.len() - 1]) {
75        return Err(format!("component `{component}` must end with [a-z0-9]"));
76    }
77
78    // Walk the component. Between alphanumeric runs, the separator
79    // must be one of: `.`, `_`, `__`, or `-+`.
80    let mut i = 0;
81    while i < bytes.len() {
82        let c = bytes[i];
83        if is_alnum(c) {
84            i += 1;
85            continue;
86        }
87        // Separator run.
88        let start = i;
89        while i < bytes.len() && !is_alnum(bytes[i]) {
90            i += 1;
91        }
92        let sep = &component[start..i];
93        if !is_valid_separator(sep) {
94            return Err(format!(
95                "component `{component}` contains invalid separator `{sep}`"
96            ));
97        }
98    }
99    Ok(())
100}
101
102const fn is_alnum(b: u8) -> bool {
103    b.is_ascii_digit() || b.is_ascii_lowercase()
104}
105
106fn is_valid_separator(s: &str) -> bool {
107    if s == "." || s == "_" || s == "__" {
108        return true;
109    }
110    // One-or-more `-`.
111    !s.is_empty() && s.bytes().all(|b| b == b'-')
112}
113
114/// Validate a tag string.
115///
116/// Spec §2: `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`.
117fn is_valid_tag(tag: &str) -> bool {
118    if tag.is_empty() || tag.len() > MAX_TAG_LENGTH {
119        return false;
120    }
121    let bytes = tag.as_bytes();
122    let first_ok = bytes[0].is_ascii_alphanumeric() || bytes[0] == b'_';
123    if !first_ok {
124        return false;
125    }
126    bytes[1..]
127        .iter()
128        .all(|&b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
129}
130
131/// A parsed manifest reference: either a tag or a digest.
132#[derive(Debug, Clone, PartialEq, Eq, Hash)]
133pub enum Reference {
134    /// Human-readable tag (e.g. `latest`, `v1.2.3`).
135    Tag(String),
136    /// Content-addressed digest.
137    Digest(Digest),
138}
139
140impl Reference {
141    /// True when this reference is a tag.
142    #[must_use]
143    pub const fn is_tag(&self) -> bool {
144        matches!(self, Self::Tag(_))
145    }
146
147    /// True when this reference is a digest.
148    #[must_use]
149    pub const fn is_digest(&self) -> bool {
150        matches!(self, Self::Digest(_))
151    }
152
153    /// Borrow the digest, if this reference is one.
154    #[must_use]
155    pub const fn as_digest(&self) -> Option<&Digest> {
156        match self {
157            Self::Digest(d) => Some(d),
158            Self::Tag(_) => None,
159        }
160    }
161
162    /// Borrow the tag string, if this reference is one.
163    #[must_use]
164    pub fn as_tag(&self) -> Option<&str> {
165        match self {
166            Self::Tag(t) => Some(t.as_str()),
167            Self::Digest(_) => None,
168        }
169    }
170}
171
172impl fmt::Display for Reference {
173    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174        match self {
175            Self::Tag(t) => f.write_str(t),
176            Self::Digest(d) => fmt::Display::fmt(d, f),
177        }
178    }
179}
180
181impl FromStr for Reference {
182    type Err = OciError;
183
184    fn from_str(s: &str) -> Result<Self, Self::Err> {
185        // Digest references always contain a `:` separator followed by
186        // a known algorithm prefix. Tag references do not contain `:`.
187        if let Some((algo, _hex)) = s.split_once(':') {
188            if algo == "sha256" || algo == "sha512" {
189                let d: Digest = s.parse().map_err(|e: ferro_blob_store::DigestParseError| {
190                    OciError::new(OciErrorCode::DigestInvalid, e.to_string())
191                })?;
192                return Ok(Self::Digest(d));
193            }
194            // A `:` without a known algorithm is an invalid reference.
195            return Err(OciError::new(
196                OciErrorCode::ManifestInvalid,
197                format!("invalid reference: `{s}`"),
198            ));
199        }
200        if !is_valid_tag(s) {
201            return Err(OciError::new(
202                OciErrorCode::ManifestInvalid,
203                format!("invalid tag: `{s}`"),
204            ));
205        }
206        Ok(Self::Tag(s.to_owned()))
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::{Reference, validate_name};
213
214    #[test]
215    fn simple_single_component_name_is_valid() {
216        assert!(validate_name("alpine").is_ok());
217    }
218
219    #[test]
220    fn nested_path_name_is_valid() {
221        assert!(validate_name("library/alpine").is_ok());
222        assert!(validate_name("my-org/sub-project/app").is_ok());
223    }
224
225    #[test]
226    fn underscore_and_dot_and_dash_separators_are_valid() {
227        assert!(validate_name("foo_bar").is_ok());
228        assert!(validate_name("foo__bar").is_ok());
229        assert!(validate_name("foo.bar").is_ok());
230        assert!(validate_name("foo-bar").is_ok());
231        assert!(validate_name("foo---bar").is_ok());
232    }
233
234    #[test]
235    fn uppercase_is_rejected() {
236        let err = validate_name("Alpine").expect_err("uppercase invalid");
237        assert_eq!(err.code.as_str(), "NAME_INVALID");
238    }
239
240    #[test]
241    fn leading_separator_is_rejected() {
242        assert!(validate_name("-alpine").is_err());
243        assert!(validate_name(".alpine").is_err());
244        assert!(validate_name("_alpine").is_err());
245    }
246
247    #[test]
248    fn trailing_separator_is_rejected() {
249        assert!(validate_name("alpine-").is_err());
250        assert!(validate_name("alpine.").is_err());
251    }
252
253    #[test]
254    fn empty_component_is_rejected() {
255        assert!(validate_name("foo//bar").is_err());
256        assert!(validate_name("/foo").is_err());
257        assert!(validate_name("foo/").is_err());
258    }
259
260    #[test]
261    fn too_long_name_is_rejected() {
262        let s = "a".repeat(256);
263        assert!(validate_name(&s).is_err());
264    }
265
266    #[test]
267    fn tag_reference_parses() {
268        let r: Reference = "v1.2.3-rc1".parse().expect("tag parse");
269        assert!(r.is_tag());
270        assert_eq!(r.as_tag(), Some("v1.2.3-rc1"));
271    }
272
273    #[test]
274    fn digest_reference_parses() {
275        let digest = format!("sha256:{}", "a".repeat(64));
276        let r: Reference = digest.parse().expect("digest parse");
277        assert!(r.is_digest());
278        assert_eq!(r.to_string(), digest);
279    }
280
281    #[test]
282    fn bad_digest_reference_is_rejected() {
283        // Known algorithm prefix + wrong hex length.
284        assert!("sha256:beef".parse::<Reference>().is_err());
285    }
286
287    #[test]
288    fn tag_with_colon_is_rejected_as_invalid_reference() {
289        // Unknown "algo" prefix before the colon.
290        assert!("some:weird".parse::<Reference>().is_err());
291    }
292}