container_image_ref/
lib.rs

1use std::{fmt::Display, str::FromStr};
2
3use nom::{
4    bytes::complete::{tag, take_till, take_till1, take_while1},
5    character::complete::{alphanumeric1, anychar},
6    combinator::{opt, rest},
7    error::{context, Error, VerboseError},
8    sequence::{preceded, separated_pair},
9    AsChar, IResult,
10};
11use serde::{
12    de::{Deserialize, Deserializer, Error as DeserializeError},
13    ser::{Serialize, Serializer},
14};
15use thiserror::Error;
16
17// This is taken from:
18// https://github.com/distribution/distribution/blob/a4d9db5a884b70be0c96dd6a7a9dbef4f2798c51/reference/reference.go#L4
19//
20// TODO:
21// For now, not all rules are checked. We do our best efforts to validate here. This will
22// slowly be improved in the future.
23//
24// TODO:
25// In the future, we also want to support image reference using:
26// https://github.com/containers/image/blob/main/docs/containers-transports.5.md
27//
28// Grammar
29//
30// 	reference                       := name [ ":" tag ] [ "@" digest ]
31// 	name                            := [domain '/'] path-component ['/' path-component]*
32// 	domain                          := domain-component ['.' domain-component]* [':' port-number]
33// 	domain-component                := /([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])/
34// 	port-number                     := /[0-9]+/
35// 	path-component                  := alpha-numeric [separator alpha-numeric]*
36// 	alpha-numeric                   := /[a-z0-9]+/
37// 	separator                       := /[_.]|__|[-]*/
38//
39// 	tag                             := /[\w][\w.-]{0,127}/
40//
41// 	digest                          := digest-algorithm ":" digest-hex
42// 	digest-algorithm                := digest-algorithm-component [ digest-algorithm-separator
43// digest-algorithm-component ]* 	digest-algorithm-separator      := /[+.-_]/
44// 	digest-algorithm-component      := /[A-Za-z][A-Za-z0-9]*/
45// 	digest-hex                      := /[0-9a-fA-F]{32,}/ ; At least 128 bit digest value
46//
47// 	identifier                      := /[a-f0-9]{64}/
48// 	short-identifier                := /[a-f0-9]{6,64}/
49
50const NAME_TOTAL_LENGTH_MAX: usize = 255;
51const TAG_TOTAL_LENGTH_MAX: usize = 127;
52
53type Res<T, U> = IResult<T, U, VerboseError<T>>;
54
55/// A container image reference
56#[derive(Debug, Clone, Eq)]
57pub struct ImageReference {
58    pub domain: String,
59    pub path: String,
60    pub tag: Option<String>,
61    pub digest: Option<String>,
62}
63
64impl ImageReference {
65    /// This equality check requires all fields to match exactly with `other`. Unlike in the
66    /// `PartialEq` implementation where a value of `None` for either the `tag` or `digest` will be
67    /// treated as a wild card and can match any value in `other` for the respective field.
68    pub fn eq_strict(&self, other: &Self) -> bool {
69        self.domain == other.domain
70            && self.path == other.path
71            && self.tag == other.tag
72            && self.digest == other.digest
73    }
74
75    pub fn domain(&self) -> &str { &self.domain }
76
77    pub fn path(&self) -> &str { &self.path }
78
79    /// Add the given digest to this image reference, replacing any digest that currently exists
80    pub fn with_digest<S: Into<String>>(self, digest: S) -> Self {
81        Self { digest: Some(digest.into()), ..self }
82    }
83}
84
85impl PartialEq for ImageReference {
86    /// Compares equality but allows a value of `None` for either the `tag` or `digest` to be
87    /// treated as a wild card and will therefore match any value in `other` for the respective
88    /// field.
89    fn eq(&self, other: &Self) -> bool {
90        let tag_match = match (self.tag.as_ref(), other.tag.as_ref()) {
91            (Some(a), Some(b)) => a == b,
92            _ => true,
93        };
94        let digest_match = match (self.digest.as_ref(), other.digest.as_ref()) {
95            (Some(a), Some(b)) => a == b,
96            _ => true,
97        };
98        self.domain == other.domain && self.path == other.path && tag_match && digest_match
99    }
100}
101
102impl Display for ImageReference {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        write!(f, "{}/{}", self.domain, self.path)?;
105        if let Some(tag) = self.tag.as_ref() {
106            write!(f, ":{tag}")?;
107        }
108
109        if let Some(digest) = self.digest.as_ref() {
110            write!(f, "@{digest}")?;
111        }
112
113        Ok(())
114    }
115}
116
117impl FromStr for ImageReference {
118    type Err = ImageReferenceError;
119
120    fn from_str(s: &str) -> Result<Self, Self::Err> { parse(s) }
121}
122
123impl Serialize for ImageReference {
124    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
125        self.to_string().serialize(serializer)
126    }
127}
128
129impl<'de> Deserialize<'de> for ImageReference {
130    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
131        let s = <&str>::deserialize(deserializer)?;
132        s.parse().map_err(D::Error::custom)
133    }
134}
135
136pub fn parse(input: &str) -> Result<ImageReference, ImageReferenceError> {
137    let reference = match parse_reference(input) {
138        Ok((residual, reference)) => {
139            if !residual.is_empty() {
140                return Err(ImageReferenceError::ErrReferenceInvalidFormat);
141            }
142
143            reference
144        }
145        Err(_) => {
146            return Err(ImageReferenceError::ErrReferenceInvalidFormat);
147        }
148    };
149
150    // TODO: Consider how to handle invalid domain. In docker, a name with
151    // invalid domain is considered to be docker.io/name. Do we want to do the
152    // same? For now, error out and requires user to input a valid domain name.
153    validate_domain(&reference.domain)?;
154
155    if reference.domain.len() + reference.path.len() > NAME_TOTAL_LENGTH_MAX {
156        return Err(ImageReferenceError::ErrNameTooLong);
157    }
158
159    if let Some(tag) = reference.tag.as_ref() {
160        validate_tags(tag)?;
161    }
162
163    if let Some(digest) = reference.digest.as_ref() {
164        validate_digest(digest)?;
165    }
166
167    Ok(reference)
168}
169
170pub fn parse_reference(input: &str) -> Res<&str, ImageReference> {
171    let (input, name) = parse_name(input)?;
172    let (input, tag) = parse_tags(input)?;
173    let (residual, digest) = parse_digest(input)?;
174    let (_, (domain, path)) = split_domain(name)?;
175
176    Ok((
177        residual,
178        ImageReference {
179            domain: domain.to_string(),
180            path: path.to_string(),
181            tag: tag.map(|s| s.to_string()),
182            digest: digest.map(|s| s.to_string()),
183        },
184    ))
185}
186
187fn parse_name(input: &str) -> Res<&str, &str> {
188    context("parse_name", take_till1(|c| (c == ':' || c == '@')))(input)
189}
190
191// Split name into domain and path. Domain is the first component delimited by a
192// '/'.  For example, name = domain/path1/path2/path3.
193fn split_domain(input: &str) -> Res<&str, (&str, &str)> {
194    context("split_domain", separated_pair(take_till(|c| c == '/'), tag("/"), rest))(input)
195}
196
197fn parse_tags(input: &str) -> Res<&str, Option<&str>> {
198    context("parse_tags", opt(preceded(tag(":"), take_till1(|c| c == '@'))))(input)
199}
200
201fn validate_tags(input: &str) -> Result<(), ImageReferenceError> {
202    let input: &str = match anychar::<_, Error<_>>(input) {
203        Ok((rest, c)) => {
204            if !c.is_alphanumeric() && c != '_' {
205                return Err(ImageReferenceError::ErrTagInvalidFormat(input.to_string()));
206            }
207
208            rest
209        }
210        Err(_) => {
211            return Err(ImageReferenceError::ErrTagInvalidFormat(input.to_string()));
212        }
213    };
214
215    if input.len() > TAG_TOTAL_LENGTH_MAX {
216        return Err(ImageReferenceError::ErrTagInvalidFormat(input.to_string()));
217    }
218
219    if !input
220        .chars()
221        .all(|c: char| c.is_alphanum() || c == '.' || c == '-' || c == '_')
222    {
223        return Err(ImageReferenceError::ErrTagInvalidFormat(input.to_string()));
224    }
225
226    Ok(())
227}
228
229fn parse_digest(input: &str) -> Res<&str, Option<&str>> {
230    context("parse_tags", opt(preceded(tag("@"), rest)))(input)
231}
232
233fn validate_digest(input: &str) -> Result<(), ImageReferenceError> {
234    let parse_hex = take_while1::<_, _, Error<_>>(|c: char| c.is_hex_digit());
235    let (input, (_protocol, digest_hex)) =
236        match separated_pair(alphanumeric1, tag(":"), parse_hex)(input) {
237            Ok((rest, (protocol, digest_hex))) => (rest, (protocol, digest_hex)),
238            Err(_) => {
239                return Err(ImageReferenceError::ErrDigestInvalidFormat(input.to_string()));
240            }
241        };
242    if !input.is_empty() {
243        return Err(ImageReferenceError::ErrDigestInvalidFormat(input.to_string()));
244    }
245
246    if !digest_hex.chars().all(|c| c.is_hex_digit()) {
247        return Err(ImageReferenceError::ErrDigestInvalidFormat(input.to_string()));
248    }
249
250    Ok(())
251}
252
253fn validate_domain(input: &str) -> Result<(), ImageReferenceError> {
254    // Check if domain containers a `.` or a `:` or the domain is exactly `localhost`.
255    if !input.chars().any(|c| c == '.' || c == ':') && input != "localhost" {
256        return Err(ImageReferenceError::ErrDomainInvalidFormat(input.to_string()));
257    }
258
259    Ok(())
260}
261
262#[derive(Debug, Error, Eq, PartialEq)]
263pub enum ImageReferenceError {
264    #[error("invalid reference format")]
265    ErrReferenceInvalidFormat,
266
267    #[error("invalid domain format: `{0}`")]
268    ErrDomainInvalidFormat(String),
269
270    #[error("invalid tag format: `{0}`")]
271    ErrTagInvalidFormat(String),
272
273    #[error("invalid digest format: `{0}`")]
274    ErrDigestInvalidFormat(String),
275
276    #[error("repository name must not be more than {} characters", NAME_TOTAL_LENGTH_MAX)]
277    ErrNameTooLong,
278
279    #[error("repository name must be lower case")]
280    ErrNameContainsUppercase,
281
282    #[error("repository name must not be empty")]
283    ErrNameEmpty,
284
285    #[error("repository name must be canonical")]
286    ErrNameNotCanonical,
287}
288
289#[cfg(test)]
290mod tests {
291
292    use super::*;
293
294    #[test]
295    fn it_works() {
296        assert_eq!(
297            parse("docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa"), Ok(ImageReference{
298            domain: "docker.io".to_string(),
299            path:"library/busybox".to_string(),
300            tag: None,
301            digest: Some("sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa".to_string())
302        }));
303    }
304
305    #[test]
306    fn test_display() {
307        let input = "docker.io/library/busybox:latest@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa";
308        let reference = parse(input).expect("failed to parse input");
309        let output = reference.to_string();
310        assert_eq!(input, output);
311    }
312
313    #[test]
314    fn test_parse_name() {
315        assert_eq!(
316            parse_name("registry.hub.docker.com/seaplane/busybox:latest"),
317            Ok((":latest", "registry.hub.docker.com/seaplane/busybox"))
318        );
319        assert_eq!(
320            parse_name("registry.hub.docker.com/seaplane/busybox@sha256:XXX"),
321            Ok(("@sha256:XXX", "registry.hub.docker.com/seaplane/busybox"))
322        );
323    }
324
325    #[test]
326    fn test_parse_tag() {
327        // Parse tags only
328        assert_eq!(parse_tags(":latest"), Ok(("", Some("latest"))));
329        // Parse both tags and digest
330        assert_eq!(parse_tags(":latest@sha256:XXX"), Ok(("@sha256:XXX", Some("latest"))));
331        // Parse only digest
332        assert_eq!(parse_tags("@sha256:XXX"), Ok(("@sha256:XXX", None)));
333        // Parse no tags
334        assert_eq!(parse_tags("registry.in"), Ok(("registry.in", None)));
335    }
336
337    #[test]
338    fn test_parse_digest() {
339        assert_eq!(parse_digest("@sha256:XXX"), Ok(("", Some("sha256:XXX"))));
340        assert_eq!(parse_digest("registry.in"), Ok(("registry.in", None)));
341        assert_eq!(parse_digest(":latest@sha256:XXX"), Ok((":latest@sha256:XXX", None)));
342    }
343
344    #[test]
345    fn test_validate_tags() {
346        assert!(validate_tags("v1.0").is_ok());
347        assert!(validate_tags("v1-0").is_ok());
348        assert!(validate_tags("1-0").is_ok());
349        assert!(validate_tags("1.0").is_ok());
350
351        assert!(validate_tags(".--..)()00").is_err());
352        assert!(validate_tags(".V100)()00").is_err());
353        assert!(validate_tags("]-g90)()00").is_err());
354        assert!(validate_tags(&"x".repeat(TAG_TOTAL_LENGTH_MAX + 10)).is_err());
355    }
356
357    #[test]
358    fn test_validate_digest() {
359        assert!(validate_digest(
360            "sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa"
361        )
362        .is_ok());
363        assert!(validate_digest(
364            "sha256:7cc4b5aefd1d0cadf8d97d435046wwwwwww2ba51c694ebca145b08d7d41b41acc8db5aa"
365        )
366        .is_err());
367        assert!(validate_digest(
368            "sha256*7cc4b5aefd1d0cadf8d97d435046wwwwwww2ba51c694ebca145b08d7d41b41acc8db5aa"
369        )
370        .is_err());
371        assert!(validate_digest("sha256:").is_err());
372    }
373
374    #[test]
375    fn test_split_domain() {
376        assert_eq!(split_domain("domain/path1/path2"), Ok(("", ("domain", "path1/path2"))));
377    }
378
379    #[test]
380    fn test_validate_domain() {
381        assert_eq!(
382            parse("seaplane/busybox:latest"),
383            Err(ImageReferenceError::ErrDomainInvalidFormat("seaplane".to_string()))
384        );
385        assert_eq!(validate_domain("docker.io"), Ok(()));
386        assert_eq!(validate_domain("registry.hub.docker.com"), Ok(()));
387        assert_eq!(validate_domain("localhost"), Ok(()));
388        assert_eq!(validate_domain("localhost:80"), Ok(()));
389    }
390
391    #[test]
392    fn partial_eq() {
393        assert_eq!(parse("domain.io/nginx:latest"), parse("domain.io/nginx@sha256:83d487b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6"));
394        assert_eq!(parse("domain.io/nginx:latest"), parse("domain.io/nginx"));
395        assert!(parse("domain.io/nginx:latest") != parse("domain.io/nginx:buster"));
396        assert!(parse("domain.io/nginx@sha256:aaaaa7b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6") != parse("domain.io/nginx@sha256:83d487b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6"));
397        assert!(parse("domain.io/nginx:latest@sha256:aaaaa7b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6") != parse("domain.io/nginx@sha256:83d487b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6"));
398        assert!(parse("domain.io/nginx:latest@sha256:83d487b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6") != parse("domain.io/nginx:slim@sha256:83d487b625d8c7818044c04f1b48aabccd3f51c3341fc300926846bca0c439e6"));
399    }
400}