oci_spec/distribution/
reference.rs

1use std::fmt;
2use std::str::FromStr;
3use std::{convert::TryFrom, sync::OnceLock};
4
5use regex::{Regex, RegexBuilder};
6use serde::{Deserialize, Serialize};
7use thiserror::Error;
8
9/// NAME_TOTAL_LENGTH_MAX is the maximum total number of characters in a repository name.
10const NAME_TOTAL_LENGTH_MAX: usize = 255;
11
12const DOCKER_HUB_DOMAIN_LEGACY: &str = "index.docker.io";
13const DOCKER_HUB_DOMAIN: &str = "docker.io";
14const DOCKER_HUB_OFFICIAL_REPO_NAME: &str = "library";
15const DEFAULT_TAG: &str = "latest";
16/// REFERENCE_REGEXP is the full supported format of a reference. The regexp
17/// is anchored and has capturing groups for name, tag, and digest components.
18const REFERENCE_REGEXP: &str = r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])(?:(?:\.(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]))+)?(?::[0-9]+)?/)?[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?(?:(?:/[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?)+)?)(?::([\w][\w.-]{0,127}))?(?:@([A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}))?$";
19
20fn reference_regexp() -> &'static Regex {
21    static RE: OnceLock<Regex> = OnceLock::new();
22    RE.get_or_init(|| {
23        RegexBuilder::new(REFERENCE_REGEXP)
24            .size_limit(10 * (1 << 21))
25            .build()
26            .unwrap()
27    })
28}
29
30/// Reasons that parsing a string as a Reference can fail.
31#[derive(Debug, Error, PartialEq, Eq)]
32pub enum ParseError {
33    /// Will be returned if digest is ill-formed
34    #[error("invalid checksum digest format")]
35    DigestInvalidFormat,
36    /// Will be returned if digest does not have a correct length
37    #[error("invalid checksum digest length")]
38    DigestInvalidLength,
39    /// Will be returned for an unknown digest algorithm
40    #[error("unsupported digest algorithm")]
41    DigestUnsupported,
42    /// Will be returned for an uppercase character in repository name
43    #[error("repository name must be lowercase")]
44    NameContainsUppercase,
45    /// Will be returned if a name is empty
46    #[error("repository name must have at least one component")]
47    NameEmpty,
48    /// Will be returned if a name is too long
49    #[error("repository name must not be more than {NAME_TOTAL_LENGTH_MAX} characters")]
50    NameTooLong,
51    /// Will be returned if a reference is ill-formed
52    #[error("invalid reference format")]
53    ReferenceInvalidFormat,
54    /// Will be returned if a tag is ill-formed
55    #[error("invalid tag format")]
56    TagInvalidFormat,
57}
58
59/// Reference provides a general type to represent any way of referencing images within an OCI registry.
60///
61/// # Examples
62///
63/// Parsing a tagged image reference:
64///
65/// ```
66/// use oci_spec::distribution::Reference;
67///
68/// let reference: Reference = "docker.io/library/hello-world:latest".parse().unwrap();
69///
70/// assert_eq!("docker.io/library/hello-world:latest", reference.whole().as_str());
71/// assert_eq!("docker.io", reference.registry());
72/// assert_eq!("library/hello-world", reference.repository());
73/// assert_eq!(Some("latest"), reference.tag());
74/// assert_eq!(None, reference.digest());
75/// ```
76#[derive(Clone, Hash, PartialEq, Eq, Debug, Serialize, Deserialize)]
77pub struct Reference {
78    registry: String,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    mirror_registry: Option<String>,
81    repository: String,
82    #[serde(skip_serializing_if = "Option::is_none")]
83    tag: Option<String>,
84    #[serde(skip_serializing_if = "Option::is_none")]
85    digest: Option<String>,
86}
87
88impl Reference {
89    /// Create a Reference with a registry, repository and tag.
90    pub fn with_tag(registry: String, repository: String, tag: String) -> Self {
91        Self {
92            registry,
93            mirror_registry: None,
94            repository,
95            tag: Some(tag),
96            digest: None,
97        }
98    }
99
100    /// Create a Reference with a registry, repository and digest.
101    pub fn with_digest(registry: String, repository: String, digest: String) -> Self {
102        Self {
103            registry,
104            mirror_registry: None,
105            repository,
106            tag: None,
107            digest: Some(digest),
108        }
109    }
110
111    /// Create a new instance of [`Reference`] with a registry, repository, tag and digest.
112    ///
113    /// This is useful when you need to reference an image by both its semantic version (tag)
114    /// and its content-addressable digest for immutability.
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// use oci_spec::distribution::Reference;
120    ///
121    /// let reference = Reference::with_tag_and_digest(
122    ///     "docker.io".to_string(),
123    ///     "library/nginx".to_string(),
124    ///     "1.21".to_string(),
125    ///     "sha256:abc123...".to_string(),
126    /// );
127    /// ```
128    pub fn with_tag_and_digest(
129        registry: String,
130        repository: String,
131        tag: String,
132        digest: String,
133    ) -> Self {
134        Self {
135            registry,
136            mirror_registry: None,
137            repository,
138            tag: Some(tag),
139            digest: Some(digest),
140        }
141    }
142
143    /// Clone the Reference for the same image with a new digest.
144    pub fn clone_with_digest(&self, digest: String) -> Self {
145        Self {
146            registry: self.registry.clone(),
147            mirror_registry: self.mirror_registry.clone(),
148            repository: self.repository.clone(),
149            tag: None,
150            digest: Some(digest),
151        }
152    }
153
154    /// Set a pull mirror registry for this reference.
155    ///
156    /// The mirror registry will be used to resolve the image, the original registry
157    /// is available via the [`Reference::namespace`] function.
158    ///
159    /// The original registry will be sent with the `ns` query parameter to the mirror registry.
160    /// The `ns` query parameter is currently not part of the stable OCI Distribution Spec yet,
161    /// but is being discussed to be added and is already used by some other implementations
162    /// (for example containerd). So be aware that this feature might not work with all registries.
163    ///
164    /// Since this is not part of the stable OCI Distribution Spec yet, this feature is exempt from
165    /// semver backwards compatibility guarantees and might change in the future.
166    #[doc(hidden)]
167    pub fn set_mirror_registry(&mut self, registry: String) {
168        self.mirror_registry = Some(registry);
169    }
170
171    /// Resolve the registry address of a given `Reference`.
172    ///
173    /// Some registries, such as docker.io, uses a different address for the actual
174    /// registry. This function implements such redirection.
175    ///
176    /// If a mirror registry is set, it will be used instead of the original registry.
177    pub fn resolve_registry(&self) -> &str {
178        match (self.registry(), self.mirror_registry.as_deref()) {
179            (_, Some(mirror_registry)) => mirror_registry,
180            ("docker.io", None) => "index.docker.io",
181            (registry, None) => registry,
182        }
183    }
184
185    /// Returns the name of the registry.
186    pub fn registry(&self) -> &str {
187        &self.registry
188    }
189
190    /// Returns the name of the repository.
191    pub fn repository(&self) -> &str {
192        &self.repository
193    }
194
195    /// Returns the object's tag, if present.
196    pub fn tag(&self) -> Option<&str> {
197        self.tag.as_deref()
198    }
199
200    /// Returns the object's digest, if present.
201    pub fn digest(&self) -> Option<&str> {
202        self.digest.as_deref()
203    }
204
205    /// Returns the original registry when pulled via a mirror.
206    ///
207    /// Since this is not part of the stable OCI Distribution Spec yet, this feature is exempt from
208    /// semver backwards compatibility guarantees and might change in the future.
209    #[doc(hidden)]
210    pub fn namespace(&self) -> Option<&str> {
211        if self.mirror_registry.is_some() {
212            Some(self.registry())
213        } else {
214            None
215        }
216    }
217
218    /// Returns the whole reference.
219    pub fn whole(&self) -> String {
220        self.to_string()
221    }
222}
223
224impl fmt::Display for Reference {
225    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226        let mut not_empty = false;
227        if !self.registry().is_empty() {
228            write!(f, "{}", self.registry())?;
229            not_empty = true;
230        }
231        if !self.repository().is_empty() {
232            if not_empty {
233                write!(f, "/")?;
234            }
235            write!(f, "{}", self.repository())?;
236            not_empty = true;
237        }
238        if let Some(t) = self.tag() {
239            if not_empty {
240                write!(f, ":")?;
241            }
242            write!(f, "{t}")?;
243            not_empty = true;
244        }
245        if let Some(d) = self.digest() {
246            if not_empty {
247                write!(f, "@")?;
248            }
249            write!(f, "{d}")?;
250        }
251        Ok(())
252    }
253}
254
255impl FromStr for Reference {
256    type Err = ParseError;
257
258    fn from_str(s: &str) -> Result<Self, Self::Err> {
259        Reference::try_from(s)
260    }
261}
262
263impl TryFrom<&str> for Reference {
264    type Error = ParseError;
265
266    fn try_from(s: &str) -> Result<Self, Self::Error> {
267        if s.is_empty() {
268            return Err(ParseError::NameEmpty);
269        }
270        let captures = match reference_regexp().captures(s) {
271            Some(caps) => caps,
272            None => {
273                return Err(ParseError::ReferenceInvalidFormat);
274            }
275        };
276        let name = &captures[1];
277        let mut tag = captures.get(2).map(|m| m.as_str().to_owned());
278        let digest = captures.get(3).map(|m| m.as_str().to_owned());
279        if tag.is_none() && digest.is_none() {
280            tag = Some(DEFAULT_TAG.into());
281        }
282        let (registry, repository) = split_domain(name);
283        let reference = Reference {
284            registry,
285            mirror_registry: None,
286            repository,
287            tag,
288            digest,
289        };
290        if reference.repository().len() > NAME_TOTAL_LENGTH_MAX {
291            return Err(ParseError::NameTooLong);
292        }
293        // Digests much always be hex-encoded, ensuring that their hex portion will always be
294        // size*2
295        if let Some(digest) = reference.digest() {
296            match digest.split_once(':') {
297                None => return Err(ParseError::DigestInvalidFormat),
298                Some(("sha256", digest)) => {
299                    if digest.len() != 64 {
300                        return Err(ParseError::DigestInvalidLength);
301                    }
302                }
303                Some(("sha384", digest)) => {
304                    if digest.len() != 96 {
305                        return Err(ParseError::DigestInvalidLength);
306                    }
307                }
308                Some(("sha512", digest)) => {
309                    if digest.len() != 128 {
310                        return Err(ParseError::DigestInvalidLength);
311                    }
312                }
313                Some((_, _)) => return Err(ParseError::DigestUnsupported),
314            }
315        }
316        Ok(reference)
317    }
318}
319
320impl TryFrom<String> for Reference {
321    type Error = ParseError;
322    fn try_from(string: String) -> Result<Self, Self::Error> {
323        TryFrom::try_from(string.as_str())
324    }
325}
326
327impl From<Reference> for String {
328    fn from(reference: Reference) -> Self {
329        reference.whole()
330    }
331}
332
333/// Splits a repository name to domain and remotename string.
334/// If no valid domain is found, the default domain is used. Repository name
335/// needs to be already validated before.
336///
337/// This function is a Rust rewrite of the official Go code used by Docker:
338/// https://github.com/distribution/distribution/blob/41a0452eea12416aaf01bceb02a924871e964c67/reference/normalize.go#L87-L104
339fn split_domain(name: &str) -> (String, String) {
340    let mut domain: String;
341    let mut remainder: String;
342
343    match name.split_once('/') {
344        None => {
345            domain = DOCKER_HUB_DOMAIN.into();
346            remainder = name.into();
347        }
348        Some((left, right)) => {
349            if !(left.contains('.') || left.contains(':')) && left != "localhost" {
350                domain = DOCKER_HUB_DOMAIN.into();
351                remainder = name.into();
352            } else {
353                domain = left.into();
354                remainder = right.into();
355            }
356        }
357    }
358    if domain == DOCKER_HUB_DOMAIN_LEGACY {
359        domain = DOCKER_HUB_DOMAIN.into();
360    }
361    if domain == DOCKER_HUB_DOMAIN && !remainder.contains('/') {
362        remainder = format!("{DOCKER_HUB_OFFICIAL_REPO_NAME}/{remainder}");
363    }
364
365    (domain, remainder)
366}
367
368#[cfg(test)]
369mod test {
370    use super::*;
371
372    mod parse {
373        use super::*;
374        use rstest::rstest;
375
376        #[rstest(input, registry, repository, tag, digest, whole,
377            case("busybox", "docker.io", "library/busybox", Some("latest"), None, "docker.io/library/busybox:latest"),
378            case("test.com:tag", "docker.io", "library/test.com", Some("tag"), None, "docker.io/library/test.com:tag"),
379            case("test.com:5000", "docker.io", "library/test.com", Some("5000"), None, "docker.io/library/test.com:5000"),
380            case("test.com/repo:tag", "test.com", "repo", Some("tag"), None, "test.com/repo:tag"),
381            case("test:5000/repo", "test:5000", "repo", Some("latest"), None, "test:5000/repo:latest"),
382            case("test:5000/repo:tag", "test:5000", "repo", Some("tag"), None, "test:5000/repo:tag"),
383            case("test:5000/repo@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "test:5000", "repo", None, Some("sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "test:5000/repo@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
384            case("test:5000/repo:tag@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "test:5000", "repo", Some("tag"), Some("sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "test:5000/repo:tag@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
385            case("lowercase:Uppercase", "docker.io", "library/lowercase", Some("Uppercase"), None, "docker.io/library/lowercase:Uppercase"),
386            case("sub-dom1.foo.com/bar/baz/quux", "sub-dom1.foo.com", "bar/baz/quux", Some("latest"), None, "sub-dom1.foo.com/bar/baz/quux:latest"),
387            case("sub-dom1.foo.com/bar/baz/quux:some-long-tag", "sub-dom1.foo.com", "bar/baz/quux", Some("some-long-tag"), None, "sub-dom1.foo.com/bar/baz/quux:some-long-tag"),
388            case("b.gcr.io/test.example.com/my-app:test.example.com", "b.gcr.io", "test.example.com/my-app", Some("test.example.com"), None, "b.gcr.io/test.example.com/my-app:test.example.com"),
389            // ☃.com in punycode
390            case("xn--n3h.com/myimage:xn--n3h.com", "xn--n3h.com", "myimage", Some("xn--n3h.com"), None, "xn--n3h.com/myimage:xn--n3h.com"),
391            // 🐳.com in punycode
392            case("xn--7o8h.com/myimage:xn--7o8h.com@sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", "xn--7o8h.com", "myimage", Some("xn--7o8h.com"), Some("sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), "xn--7o8h.com/myimage:xn--7o8h.com@sha512:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
393            case("foo_bar.com:8080", "docker.io", "library/foo_bar.com", Some("8080"), None, "docker.io/library/foo_bar.com:8080" ),
394            case("foo/foo_bar.com:8080", "docker.io", "foo/foo_bar.com", Some("8080"), None, "docker.io/foo/foo_bar.com:8080"),
395            case("opensuse/leap:15.3", "docker.io", "opensuse/leap", Some("15.3"), None, "docker.io/opensuse/leap:15.3"),
396        )]
397        fn parse_good_reference(
398            input: &str,
399            registry: &str,
400            repository: &str,
401            tag: Option<&str>,
402            digest: Option<&str>,
403            whole: &str,
404        ) {
405            println!("input: {}", input);
406            let reference = Reference::try_from(input).expect("could not parse reference");
407            println!("{} -> {:?}", input, reference);
408            assert_eq!(registry, reference.registry());
409            assert_eq!(repository, reference.repository());
410            assert_eq!(tag, reference.tag());
411            assert_eq!(digest, reference.digest());
412            assert_eq!(whole, reference.whole());
413        }
414
415        #[rstest(input, err,
416            case("", ParseError::NameEmpty),
417            case(":justtag", ParseError::ReferenceInvalidFormat),
418            case("@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::ReferenceInvalidFormat),
419            case("repo@sha256:ffffffffffffffffffffffffffffffffff", ParseError::DigestInvalidLength),
420            case("validname@invaliddigest:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::DigestUnsupported),
421            // FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
422            case("Uppercase:tag", ParseError::ReferenceInvalidFormat),
423            // FIXME: "Uppercase" is incorrectly handled as a domain-name here, and therefore passes.
424            // https://github.com/docker/distribution/blob/master/reference/reference_test.go#L104-L109
425            // case("Uppercase/lowercase:tag", ParseError::NameContainsUppercase),
426            // FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
427            case("test:5000/Uppercase/lowercase:tag", ParseError::ReferenceInvalidFormat),
428            case("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ParseError::NameTooLong),
429            case("aa/asdf$$^/aa", ParseError::ReferenceInvalidFormat)
430        )]
431        fn parse_bad_reference(input: &str, err: ParseError) {
432            assert_eq!(Reference::try_from(input).unwrap_err(), err)
433        }
434
435        #[rstest(
436            input,
437            registry,
438            resolved_registry,
439            whole,
440            case(
441                "busybox",
442                "docker.io",
443                "index.docker.io",
444                "docker.io/library/busybox:latest"
445            ),
446            case("test.com/repo:tag", "test.com", "test.com", "test.com/repo:tag"),
447            case("test:5000/repo", "test:5000", "test:5000", "test:5000/repo:latest"),
448            case(
449                "sub-dom1.foo.com/bar/baz/quux",
450                "sub-dom1.foo.com",
451                "sub-dom1.foo.com",
452                "sub-dom1.foo.com/bar/baz/quux:latest"
453            ),
454            case(
455                "b.gcr.io/test.example.com/my-app:test.example.com",
456                "b.gcr.io",
457                "b.gcr.io",
458                "b.gcr.io/test.example.com/my-app:test.example.com"
459            )
460        )]
461        fn test_mirror_registry(input: &str, registry: &str, resolved_registry: &str, whole: &str) {
462            let mut reference = Reference::try_from(input).expect("could not parse reference");
463            assert_eq!(resolved_registry, reference.resolve_registry());
464            assert_eq!(registry, reference.registry());
465            assert_eq!(None, reference.namespace());
466            assert_eq!(whole, reference.whole());
467
468            reference.set_mirror_registry("docker.mirror.io".to_owned());
469            assert_eq!("docker.mirror.io", reference.resolve_registry());
470            assert_eq!(registry, reference.registry());
471            assert_eq!(Some(registry), reference.namespace());
472            assert_eq!(whole, reference.whole());
473        }
474
475        #[rstest(
476            expected, registry, repository, tag, digest,
477            case(
478                "docker.io/foo/bar:1.2@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 
479                "docker.io", 
480                "foo/bar", 
481                "1.2", 
482                "sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
483            )
484        )]
485        fn test_create_reference_from_tag_and_digest(
486            expected: &str,
487            registry: &str,
488            repository: &str,
489            tag: &str,
490            digest: &str,
491        ) {
492            let reference = Reference::with_tag_and_digest(
493                registry.to_string(),
494                repository.to_string(),
495                tag.to_string(),
496                digest.to_string(),
497            );
498            assert_eq!(expected, reference.to_string());
499        }
500    }
501}