buildkit_rs_reference/
reference.rs

1use crate::{
2    consts::{DEFAULT_DOMAIN, LEGACY_DEFAULT_DOMAIN, NAME_TOTAL_LENGTH_MAX, OFFICIAL_REPO_PREFIX},
3    regex::{ANCHORED_IDENTIFIER_REGEXP, ANCHORED_NAME_REGEXP, REFERENCE_REGEX},
4    Error,
5};
6use std::{borrow::Cow, cmp::Ordering, fmt};
7
8/// A repository is the part of a reference before the tag or digest
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Repository {
11    /// The domain of the repository (e.g. `docker.io`)
12    pub domain: Option<String>,
13    /// The path of the repository (e.g. `library/alpine`)
14    pub path: Option<String>,
15}
16
17impl fmt::Display for Repository {
18    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
19        write!(f, "{}", self.domain_or_default())?;
20
21        if let Some(path) = self.normalized_path() {
22            write!(f, "/{path}")?;
23        }
24
25        Ok(())
26    }
27}
28
29impl Repository {
30    /// Returns the domain of the repository, or the default domain (`docker.io`) if it is not set
31    pub fn domain_or_default(&self) -> &str {
32        self.domain.as_deref().unwrap_or(DEFAULT_DOMAIN)
33    }
34
35    /// Normalizes the path of the repository if it is an official repository
36    ///
37    /// (i.e. `library/foo` -> `foo`)
38    pub fn normalized_path(&self) -> Option<Cow<str>> {
39        let path = self.path.as_deref()?;
40        if matches!(
41            self.domain.as_deref(),
42            None | Some(DEFAULT_DOMAIN) | Some(LEGACY_DEFAULT_DOMAIN)
43        ) && !path.contains('/')
44        {
45            Some(format!("{OFFICIAL_REPO_PREFIX}{}", path).into())
46        } else {
47            Some(path.into())
48        }
49    }
50}
51
52/// A reference is a named reference to an image
53///
54/// Examples:
55/// - `docker.io/library/alpine`
56/// - `docker.io/library/alpine:latest`
57/// - `docker.io/library/alpine@sha256:86e0e091d0da6bde2456dbb48306f3956bbeb2eae1b5b9a43045843f69fe4aaa`
58///
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct Reference {
61    /// The repository of the reference, (e.g. `docker.io/library/alpine`)
62    pub repository: Repository,
63    /// The tag of the reference, (e.g. `latest`)
64    pub tag: Option<String>,
65    /// The digest of the reference, (e.g. `sha256:86e0e091d0da6bde2456dbb48306f3956bbeb2eae1b5b9a43045843f69fe4aaa`)
66    pub digest: Option<String>,
67}
68
69impl fmt::Display for Reference {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        write!(f, "{}", self.repository)?;
72
73        if let Some(tag) = &self.tag {
74            write!(f, ":{tag}")?;
75        }
76
77        if let Some(digest) = &self.digest {
78            write!(f, "@{digest}")?;
79        }
80
81        Ok(())
82    }
83}
84
85impl Reference {
86    /// Parse parses `s` and returns a syntactically valid Reference
87    pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
88        let s = s.as_ref();
89
90        // TODO: This maybe should use a real parser instead of regexes, the regex is REALLY BIG
91        // and makes a 60Mb allocationthis was copied from the original go code, but we can do better :)
92        let matches = REFERENCE_REGEX.captures(s);
93        let Some(matches) = matches else {
94            if s == "" {
95                return Err(Error::NameEmpty);
96            }
97            if REFERENCE_REGEX.captures(&s.to_lowercase()).is_some() {
98                return Err(Error::NameContainsUppercase)
99            }
100            return Err(Error::InvalidReferenceFormat)
101        };
102
103        if matches.get(0).unwrap().as_str().len() > NAME_TOTAL_LENGTH_MAX {
104            return Err(Error::NameTooLong);
105        }
106
107        let name_match = ANCHORED_NAME_REGEXP.captures(
108            matches
109                .get(1)
110                .ok_or_else(|| Error::InvalidReferenceFormat)?
111                .as_str(),
112        );
113        let Some(name_match) = name_match else {
114            return Err(Error::InvalidReferenceFormat)
115        };
116
117        let repo = match name_match.get(1) {
118            Some(domain) => Repository {
119                domain: Some(domain.as_str().into()),
120                path: name_match.get(2).map(|m| m.as_str().into()),
121            },
122            None => Repository {
123                domain: None,
124                path: name_match.get(2).map(|m| m.as_str().into()).into(),
125            },
126        };
127
128        let tag = matches.get(2).map(|m| m.as_str().to_owned());
129
130        // TODO: Actually validate the digest
131        let digest = matches.get(3).map(|m| m.as_str().to_owned());
132
133        Ok(Reference {
134            repository: repo,
135            tag,
136            digest,
137        })
138    }
139
140    /// Parses a string into a named reference transforming a familiar name from Docker UI
141    /// to a fully qualified reference
142    pub fn parse_normalized_named(s: &str) -> Result<Self, Error> {
143        if ANCHORED_IDENTIFIER_REGEXP.is_match(s) {
144            return Err(Error::NameIdentifier);
145        }
146
147        let (domain, remainder) = split_docker_domain(s);
148        let remote = remainder
149            .find(':')
150            .map(|i| &remainder[..i])
151            .unwrap_or(&remainder);
152
153        if remote.contains(|c: char| c.is_uppercase()) {
154            return Err(Error::NameContainsUppercase);
155        }
156
157        Self::parse(&format!("{domain}/{remainder}"))
158    }
159
160    /// Returns the domain of the reference, or the default domain (`docker.io`) if it is not set
161    pub fn domain(&self) -> &str {
162        self.repository.domain_or_default()
163    }
164
165    /// Returns the path of the reference, normalized if it is an official repository
166    pub fn path(&self) -> Option<Cow<str>> {
167        self.repository.normalized_path()
168    }
169
170    /// The tag of the reference, or `None` if it is not set
171    pub fn tag(&self) -> Option<&str> {
172        self.tag.as_deref()
173    }
174
175    /// The digest of the reference, or `None` if it is not set
176    pub fn digest(&self) -> Option<&str> {
177        self.digest.as_deref()
178    }
179
180    /// `rank_ord` returns a [Ordering] based on the following rules preferring higher
181    /// information references, then by the lexicographical ordering of the reference string:
182    ///
183    /// | Rule | Example |
184    /// |------|---------|
185    /// | [Named] + [Tagged] + [Digested] | `docker.io/library/busybox:latest@sha256:<digest>` |
186    /// | [Named] + [Tagged]              | `docker.io/library/busybox:latest` |
187    /// | [Named] + [Digested]            | `docker.io/library/busybo@sha256:<digest>` |
188    /// | [Named]                         | `docker.io/library/busybox` |
189    /// | [Digested]                      | `docker.io@sha256:<digest>` |
190    /// | Error                           | The reference is not valid due to not matching any of the above rules |
191    ///
192    /// [Original](https://github.com/distribution/distribution/blob/e5d5810851d1f17a5070e9b6f940d8af98ea3c29/reference/sort.go)
193    pub fn rank_ord(&self, other: &Self) -> Ordering {
194        let get_order = |r: &Reference| {
195            if r.repository.path.is_some() {
196                if r.tag.is_some() {
197                    if r.digest.is_some() {
198                        1
199                    } else {
200                        2
201                    }
202                } else if r.digest.is_some() {
203                    3
204                } else {
205                    4
206                }
207            } else if r.digest.is_some() {
208                5
209            } else {
210                6
211            }
212        };
213
214        let order = get_order(self);
215        let other_order = get_order(other);
216
217        if order == other_order {
218            // Convert to a string and compare
219            self.to_string().cmp(&other.to_string())
220        } else {
221            order.cmp(&other_order)
222        }
223    }
224}
225
226/// splitDockerDomain splits a repository name to domain and remote-name.
227/// If no valid domain is found, the default domain is used. Repository name
228/// needs to be already validated before.
229///
230/// https://github.com/distribution/distribution/blob/e5d5810851d1f17a5070e9b6f940d8af98ea3c29/reference/normalize.go#L126
231fn split_docker_domain<'a>(name: &'a str) -> (&'a str, Cow<'a, str>) {
232    let mut domain: &str;
233    let mut remainder: Cow<'a, str>;
234
235    if let Some(i) = name.find('/') {
236        if !name[..i].chars().any(|c| c == '.' || c == ':')
237            && &name[..i] != "localhost"
238            && name[..i].to_lowercase() == name[..i]
239        {
240            domain = DEFAULT_DOMAIN;
241            remainder = name.into();
242        } else {
243            domain = &name[..i];
244            remainder = (&name[i + 1..]).into();
245        }
246    } else {
247        domain = DEFAULT_DOMAIN;
248        remainder = name.into();
249    }
250
251    if domain == LEGACY_DEFAULT_DOMAIN {
252        domain = DEFAULT_DOMAIN;
253    }
254
255    if domain == DEFAULT_DOMAIN && !remainder.contains('/') {
256        remainder = format!("{OFFICIAL_REPO_PREFIX}{remainder}").into();
257    }
258
259    (domain, remainder)
260}
261
262#[cfg(test)]
263mod test {
264    use super::*;
265
266    #[test]
267    fn test_normalized_named_valid_repo_names() {
268        let valid_repos = [
269            "docker/docker",
270            "library/debian",
271            "debian",
272            "docker.io/docker/docker",
273            "docker.io/library/debian",
274            "docker.io/debian",
275            "index.docker.io/docker/docker",
276            "index.docker.io/library/debian",
277            "index.docker.io/debian",
278            "127.0.0.1:5000/docker/docker",
279            "127.0.0.1:5000/library/debian",
280            "127.0.0.1:5000/debian",
281            "192.168.0.1",
282            "192.168.0.1:80",
283            "192.168.0.1:8/debian",
284            "192.168.0.2:25000/debian",
285            "thisisthesongthatneverendsitgoesonandonandonthisisthesongthatnev",
286            "[fc00::1]:5000/docker",
287            "[fc00::1]:5000/docker/docker",
288            "[fc00:1:2:3:4:5:6:7]:5000/library/debian",
289            // This test case was moved from invalid to valid since it is valid input
290            // when specified with a hostname, it removes the ambiguity from about
291            // whether the value is an identifier or repository name
292            "docker.io/1a3f5e7d9c1b3a5f7e9d1c3b5a7f9e1d3c5b7a9f1e3d5d7c9b1a3f5e7d9c1b3a",
293            "Docker/docker",
294            "DOCKER/docker",
295        ];
296
297        for repo in valid_repos {
298            assert!(Reference::parse_normalized_named(repo).is_ok());
299        }
300    }
301
302    #[test]
303    fn test_normalized_named_invalid_repo_names() {
304        let invalid_repos = [
305            "https://github.com/docker/docker",
306            "docker/Docker",
307            "-docker",
308            "-docker/docker",
309            "-docker.io/docker/docker",
310            "docker///docker",
311            "docker.io/docker/Docker",
312            "docker.io/docker///docker",
313            "[fc00::1]",
314            "[fc00::1]:5000",
315            "fc00::1:5000/debian",
316            "[fe80::1%eth0]:5000/debian",
317            "[2001:db8:3:4::192.0.2.33]:5000/debian",
318            "1a3f5e7d9c1b3a5f7e9d1c3b5a7f9e1d3c5b7a9f1e3d5d7c9b1a3f5e7d9c1b3a",
319        ];
320
321        for repo in invalid_repos {
322            assert!(Reference::parse_normalized_named(repo).is_err());
323        }
324    }
325
326    #[test]
327    fn test_normalized_named_valid_remote_name() {
328        let valid_remote_names = [
329            // Sanity check.
330            "docker/docker",
331            // Allow 64-character non-hexadecimal names (hexadecimal names are forbidden).
332            "thisisthesongthatneverendsitgoesonandonandonthisisthesongthatnev",
333            // Allow embedded hyphens.
334            "docker-rules/docker",
335            // Allow multiple hyphens as well.
336            "docker---rules/docker",
337            // Username doc and image name docker being tested.
338            "doc/docker",
339            // single character names are now allowed.
340            "d/docker",
341            "jess/t",
342            // Consecutive underscores.
343            "dock__er/docker",
344        ];
345
346        for remote_name in valid_remote_names {
347            assert!(Reference::parse_normalized_named(remote_name).is_ok());
348        }
349    }
350
351    #[test]
352    fn test_normalized_named_invalid_remote_name() {
353        let invalid_remote_names = [
354            // Disallow capital letters.
355            "docker/Docker",
356            // Only allow one slash.
357            "docker///docker",
358            // Disallow 64-character hexadecimal.
359            "1a3f5e7d9c1b3a5f7e9d1c3b5a7f9e1d3c5b7a9f1e3d5d7c9b1a3f5e7d9c1b3a",
360            // Disallow leading and trailing hyphens in namespace.
361            "-docker/docker",
362            "docker-/docker",
363            "-docker-/docker",
364            // Don't allow underscores everywhere (as opposed to hyphens).
365            "____/____",
366            "_docker/_docker",
367            // Disallow consecutive periods.
368            "dock..er/docker",
369            "dock_.er/docker",
370            "dock-.er/docker",
371            // No repository.
372            "docker/",
373            // namespace too long
374            "this_is_not_a_valid_namespace_because_its_lenth_is_greater_than_255_this_is_not_a_valid_namespace_because_its_lenth_is_greater_than_255_this_is_not_a_valid_namespace_because_its_lenth_is_greater_than_255_this_is_not_a_valid_namespace_because_its_lenth_is_greater_than_255/docker",
375        ];
376
377        for remote_name in invalid_remote_names {
378            assert!(Reference::parse_normalized_named(remote_name).is_err());
379        }
380    }
381
382    #[test]
383    fn test_parse_reference_with_tag_and_digest() {
384        let short_ref = "busybox:latest@sha256:86e0e091d0da6bde2456dbb48306f3956bbeb2eae1b5b9a43045843f69fe4aaa";
385        let normalized = Reference::parse_normalized_named(short_ref).unwrap();
386
387        assert_eq!(
388            normalized,
389            Reference {
390                repository: Repository {
391                    domain: Some("docker.io".into()),
392                    path: Some("library/busybox".into())
393                },
394                tag: Some("latest".into()),
395                digest: Some(
396                    "sha256:86e0e091d0da6bde2456dbb48306f3956bbeb2eae1b5b9a43045843f69fe4aaa"
397                        .into()
398                )
399            }
400        );
401
402        assert_eq!(
403            normalized.to_string(),
404            "docker.io/library/busybox:latest@sha256:86e0e091d0da6bde2456dbb48306f3956bbeb2eae1b5b9a43045843f69fe4aaa"
405        );
406    }
407}