docker_image_reference/
lib.rs

1use anyhow::{bail, Result};
2use const_format::concatcp;
3use lazy_static::lazy_static;
4use regex::Regex;
5use std::fmt;
6
7// Grammar -- from https://github.com/distribution/distribution/blob/v2.7.1/reference/reference.go
8//
9// reference        := name [ ":" tag ] [ "@" digest ]
10// name             := [domain '/'] path-component ['/' path-component]*
11// domain           := domain-component ['.' domain-component]* [':' port-number]
12// domain-component := /([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])/
13// port-number      := /[0-9]+/
14// path-component   := alpha-numeric [separator alpha-numeric]*
15// alpha-numeric    := /[a-z0-9]+/
16// separator        := /[_.]|__|[-]*/
17//
18// tag              := /[\w][\w.-]{0,127}/
19//
20// digest                     := digest-algorithm ":" digest-hex
21// digest-algorithm           := digest-algorithm-component [ digest-algorithm-separator digest-algorithm-component ]*
22// digest-algorithm-separator := /[+.-_]/
23// digest-algorithm-component := /[A-Za-z][A-Za-z0-9]*/
24// digest-hex                 := /[0-9a-fA-F]{32,}/ ; At least 128 bit digest value
25
26const NAME: &str = concatcp!("^(", DOMAIN, "/)?", PATH_COMPONENT, "(/", PATH_COMPONENT, ")*");
27const DOMAIN: &str = concatcp!(DOMAIN_COMPONENT, r#"(\."#, DOMAIN_COMPONENT, ")*(:", PORT_NUMBER, ")?");
28const DOMAIN_COMPONENT: &str = r#"([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])"#;
29const PORT_NUMBER: &str = r#"([0-9]+)"#;
30const PATH_COMPONENT: &str = concatcp!(ALPHA_NUMERIC, "(", SEPARATOR, ALPHA_NUMERIC, ")*");
31const ALPHA_NUMERIC: &str = r#"([a-z0-9]+)"#;
32const SEPARATOR: &str = r#"([_\.]|__|[-]*)"#;
33
34const COLON_TAG: &str = concatcp!("^:", TAG);
35const TAG: &str = r#"([0-9A-Za-z_][0-9A-Za-z_\.-]{0,127})"#;
36
37const AT_DIGEST: &str = concatcp!("^@", DIGEST);
38const DIGEST: &str = concatcp!(DIGEST_ALGORITHM, ":", DIGEST_HEX);
39const DIGEST_ALGORITHM: &str = concatcp!(
40    DIGEST_ALGORITHM_COMPONENT,
41    "(",
42    DIGEST_ALGORITHM_SEPARATOR,
43    DIGEST_ALGORITHM_COMPONENT,
44    ")*"
45);
46const DIGEST_ALGORITHM_SEPARATOR: &str = r#"([\+\.-_])"#;
47const DIGEST_ALGORITHM_COMPONENT: &str = r#"([A-Za-z][A-Za-z0-9]*)"#;
48const DIGEST_HEX: &str = r#"([0-9a-fA-F]{32,})"#;
49
50lazy_static! {
51    static ref NAME_REGEX: Regex = Regex::new(NAME).unwrap();
52    static ref COLON_TAG_REGEX: Regex = Regex::new(COLON_TAG).unwrap();
53    static ref AT_DIGEST_REGEX: Regex = Regex::new(AT_DIGEST).unwrap();
54}
55
56/// Similar to regular FromStr but returns unused trailing characters.
57trait FromStrExtended<'a>: Sized {
58    fn from_str_ext(s: &'a str) -> Result<(Self, &'a str)>;
59}
60
61/// A reference to a docker image, e.g. `ubuntu:20.04` or `localhost:5000/example:1.0-dev`.
62///
63/// [`Reference::from_str()`] can be used to parse an image reference following the grammar
64/// specified in <https://github.com/distribution/distribution/blob/v2.7.1/reference/reference.go>.
65///
66/// In short, a reference is of the form `name [':' tag] ['@' digest]`, where `tag` and `digest`
67/// parts are optional. More information on the grammar can be found in the link above.
68///
69/// Note that no semantic check is performed, e.g. whether the port number is too long, etc.
70/// However it should be able to correctly parse the `name`, `tag` and `digest` components of a
71/// reference.
72///
73/// [`Reference::from_str()`]: #method.from_str
74#[derive(PartialEq)]
75pub struct Reference<'r> {
76    name: &'r str,
77    tag: Option<&'r str>,
78    digest: Option<Digest<'r>>,
79}
80
81impl<'r> Reference<'r> {
82    /// Parse a reference string.
83    ///
84    /// For example:
85    /// ```
86    /// use docker_image_reference::Reference;
87    /// let r = Reference::from_str("ubuntu:20.04").unwrap();
88    /// assert_eq!(r.name(), "ubuntu");
89    /// assert_eq!(r.tag(), Some("20.04"));
90    /// ```
91    pub fn from_str(s: &'r str) -> Result<Self> {
92        Reference::from_str_ext(s).map(|(r, _)| r)
93    }
94
95    /// Get the name component of the reference. This might start with a `host[:port]` part
96    /// followed by one or more path components separated by slash.
97    ///
98    /// For example:
99    /// ```
100    /// use docker_image_reference::Reference;
101    /// let r = Reference::from_str("index.docker.io/library/ubuntu:latest").unwrap();
102    /// assert_eq!(r.name(), "index.docker.io/library/ubuntu");
103    /// ```
104    pub fn name(&self) -> &'r str {
105        self.name
106    }
107
108    /// Get the tag component if present. This is a sequence of up to 128 alphanumerics,
109    /// `-`, `.` and `_` not starting with `.` or `-`.
110    ///
111    /// For example:
112    /// ```
113    /// use docker_image_reference::Reference;
114    /// let r = Reference::from_str("example:1.2.3-dev_test").unwrap();
115    /// assert_eq!(r.tag(), Some("1.2.3-dev_test"));
116    /// ```
117    pub fn tag(&self) -> Option<&'r str> {
118        self.tag
119    }
120
121    /// Returns true if the reference contains a digest component.
122    /// If this function returns true, then both [`digest_algorithm()`] and [`digest_hex()`]
123    /// will return `Some`.
124    ///
125    /// For example:
126    /// ```
127    /// use docker_image_reference::Reference;
128    /// let r = Reference::from_str("image-name@sha256:9d78ad0da0e88ca15da5735b9f70064d3099ac0a8cd9dc839795789400a38e42").unwrap();
129    /// assert!(r.has_digest());
130    /// assert_eq!(r.digest_algorithm(), Some("sha256"));
131    /// assert_eq!(r.digest_hex(), Some("9d78ad0da0e88ca15da5735b9f70064d3099ac0a8cd9dc839795789400a38e42"));
132    /// ```
133    ///
134    /// [`digest_algorithm()`]: #method.digest_algorithm
135    /// [`digest_hex()`]: #method.digest_hex
136    pub fn has_digest(&self) -> bool {
137        self.digest.is_some()
138    }
139
140    pub fn digest_algorithm(&self) -> Option<&'r str> {
141        self.digest.as_ref().map(|d| d.algorithm)
142    }
143
144    pub fn digest_hex(&self) -> Option<&'r str> {
145        self.digest.as_ref().map(|d| d.digest_hex)
146    }
147}
148
149impl<'a> FromStrExtended<'a> for Reference<'a> {
150    fn from_str_ext(s: &'a str) -> Result<(Self, &'a str)> {
151        let (name, s) = match NAME_REGEX.find(s) {
152            Some(m) => (m.as_str(), &s[m.end()..]),
153            None => bail!("no name found in `{}`", s),
154        };
155        let (tag, s) = match s.chars().next() {
156            Some(':') => {
157                let (tag, s) = Tag::from_str_ext(s)?;
158                (Some(tag.0), s)
159            }
160            _ => (None, s),
161        };
162        let (digest, s) = match s.chars().next() {
163            Some('@') => {
164                let (digest, s) = Digest::from_str_ext(s)?;
165                (Some(digest), s)
166            }
167            _ => (None, s),
168        };
169        if s != "" {
170            bail!("unrecognized trailing characters: `{}`", s);
171        }
172        Ok((Reference { name, tag, digest }, ""))
173    }
174}
175
176impl<'r> fmt::Display for Reference<'r> {
177    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178        write!(f, "{}", self.name)?;
179        if let Some(ref tag) = self.tag {
180            write!(f, ":{}", tag)?;
181        }
182        if let Some(ref digest) = self.digest {
183            write!(f, "@{}:{}", digest.algorithm, digest.digest_hex)?;
184        }
185        Ok(())
186    }
187}
188
189struct Tag<'r>(&'r str);
190
191impl<'a> FromStrExtended<'a> for Tag<'a> {
192    fn from_str_ext(s: &'a str) -> Result<(Self, &'a str)> {
193        let (colon_tag, s) = match COLON_TAG_REGEX.find(s) {
194            Some(m) => (m.as_str(), &s[m.end()..]),
195            None => bail!("no tag found in `{}`", s),
196        };
197        let tag = colon_tag.strip_prefix(":").expect("colon at the begining");
198        Ok((Tag(tag), s))
199    }
200}
201
202#[derive(PartialEq)]
203struct Digest<'r> {
204    algorithm: &'r str,
205    digest_hex: &'r str,
206}
207
208impl<'a> FromStrExtended<'a> for Digest<'a> {
209    fn from_str_ext(s: &'a str) -> Result<(Self, &'a str)> {
210        let (at_digest, s) = match AT_DIGEST_REGEX.find(s) {
211            Some(m) => (m.as_str(), &s[m.end()..]),
212            None => bail!("no digest found in `{}`", s),
213        };
214        let mut split = at_digest.strip_prefix("@").expect("@ at the begining").split(":");
215        let (algorithm, digest_hex) = match (split.next(), split.next()) {
216            (Some(algorithm), Some(digest_hex)) => (algorithm, digest_hex),
217            _ => unreachable!(),
218        };
219        Ok((Digest { algorithm, digest_hex }, s))
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use std::fmt::Display;
227
228    struct Test<'t, T> {
229        input: &'t str,
230        want: Result<(T, &'t str), String>,
231    }
232
233    impl<'t, T: FromStrExtended<'t> + PartialEq + Display> Test<'t, T> {
234        fn run(self) -> Result<(), String> {
235            let res = T::from_str_ext(self.input);
236            match (self.want, res) {
237                (Ok((expected_value, expected_unused)), Ok((value, unused))) => {
238                    if value == expected_value && unused == expected_unused {
239                        Ok(())
240                    } else {
241                        let mut err = String::new();
242                        if value != expected_value {
243                            err.push_str(&format!("expected value `{}` got `{}`. ", expected_value, value));
244                        }
245                        if unused != expected_unused {
246                            err.push_str(&format!("expected unused `{}` got `{}`.", expected_unused, unused));
247                        }
248                        Err(err)
249                    }
250                }
251                (Err(expected_err), Err(err)) => {
252                    if expected_err == err.to_string() {
253                        Ok(())
254                    } else {
255                        Err(format!("expected error `{}`, got different error `{}`", expected_err, err))
256                    }
257                }
258                (Ok(_), Err(_)) => Err(format!("expected ok, got error")),
259                (Err(_), Ok(_)) => Err(format!("expected error, got ok")),
260            }
261        }
262    }
263
264    #[test]
265    fn reference_grammar() {
266        let tests = vec![
267            Test {
268                input: "ubuntu:16.04",
269                want: Ok((
270                    Reference {
271                        name: "ubuntu",
272                        tag: Some("16.04"),
273                        digest: None,
274                    },
275                    "",
276                )),
277            },
278            Test {
279                input: "example.com/user-name/ubuntu:16.04-lts",
280                want: Ok((
281                    Reference {
282                        name: "example.com/user-name/ubuntu",
283                        tag: Some("16.04-lts"),
284                        digest: None,
285                    },
286                    "",
287                )),
288            },
289            Test {
290                input: "example.com:8080/user-name/ubuntu:16.04-lts",
291                want: Ok((
292                    Reference {
293                        name: "example.com:8080/user-name/ubuntu",
294                        tag: Some("16.04-lts"),
295                        digest: None,
296                    },
297                    "",
298                )),
299            },
300            Test {
301                input: "example.com:8080/user-name/ubuntu",
302                want: Ok((
303                    Reference {
304                        name: "example.com:8080/user-name/ubuntu",
305                        tag: None,
306                        digest: None,
307                    },
308                    "",
309                )),
310            },
311            Test {
312                input: "example.com:8080/user___name/ubuntu",
313                want: Err("unrecognized trailing characters: `___name/ubuntu`".to_owned()),
314            },
315            Test {
316                input: "example.com:8080/user-name/ubuntu:φ",
317                want: Err("no tag found in `:φ`".to_owned()),
318            },
319            Test {
320                input: "example.com:8080/user-name/ubuntu@φ",
321                want: Err("no digest found in `@φ`".to_owned()),
322            },
323            Test {
324                input: "αβγδ",
325                want: Err("no name found in `αβγδ`".to_owned()),
326            },
327        ];
328
329        for t in tests {
330            match t.run() {
331                Ok(()) => {}
332                Err(e) => panic!("{}", e),
333            }
334        }
335    }
336
337    #[test]
338    fn public_api() {
339        let r = Reference::from_str("user/image:tag").unwrap();
340        assert_eq!(r.name(), "user/image");
341        assert_eq!(r.tag(), Some("tag"));
342        assert_eq!(r.digest_algorithm(), None);
343        assert_eq!(r.digest_hex(), None);
344        assert_eq!(r.to_string(), "user/image:tag".to_owned());
345
346        let r = Reference::from_str("user/image:1.2.3-abc@sha256:9d78ad0da0e88ca15da5735b9f70064d3099ac0a8cd9dc839795789400a38e42").unwrap();
347        assert_eq!(r.name(), "user/image");
348        assert_eq!(r.tag(), Some("1.2.3-abc"));
349        assert_eq!(r.digest_algorithm(), Some("sha256"));
350        assert_eq!(r.digest_hex(), Some("9d78ad0da0e88ca15da5735b9f70064d3099ac0a8cd9dc839795789400a38e42"));
351        assert_eq!(
352            r.to_string(),
353            "user/image:1.2.3-abc@sha256:9d78ad0da0e88ca15da5735b9f70064d3099ac0a8cd9dc839795789400a38e42".to_owned()
354        );
355    }
356}