smolvm_protocol/image_ref.rs
1//! Image reference canonicalization.
2//!
3//! OCI image references have many valid spellings for the same image.
4//! This module provides [`normalize_image_ref`], which maps every spelling
5//! to a single canonical form so that cache keys, log messages, and
6//! protocol messages are consistent regardless of how the caller spelled
7//! the reference.
8
9/// Canonicalize an OCI image reference.
10///
11/// All equivalent spellings of the same image produce an identical string,
12/// which is safe to use as a cache key or protocol field.
13///
14/// # Normalization rules (applied in order)
15///
16/// 1. `index.docker.io` is rewritten to `docker.io` (legacy alias).
17/// 2. A missing registry defaults to `docker.io`.
18/// 3. Single-component names on `docker.io` receive the `library/` prefix
19/// (e.g. `alpine` → `docker.io/library/alpine`).
20/// 4. A missing tag defaults to `:latest`. When a digest (`@sha256:…`) is
21/// present it takes precedence and any tag is dropped.
22///
23/// # Examples
24///
25/// ```
26/// use smolvm_protocol::normalize_image_ref;
27///
28/// assert_eq!(normalize_image_ref("alpine"),
29/// "docker.io/library/alpine:latest");
30/// assert_eq!(normalize_image_ref("alpine:3.20"),
31/// "docker.io/library/alpine:3.20");
32/// assert_eq!(normalize_image_ref("docker.io/alpine:3.20"),
33/// "docker.io/library/alpine:3.20");
34/// assert_eq!(normalize_image_ref("docker.io/library/alpine:3.20"),
35/// "docker.io/library/alpine:3.20");
36/// assert_eq!(normalize_image_ref("ghcr.io/owner/repo:v1"),
37/// "ghcr.io/owner/repo:v1");
38/// ```
39pub fn normalize_image_ref(image: &str) -> String {
40 // 1. Resolve index.docker.io alias.
41 let owned;
42 let image = if let Some(rest) = image.strip_prefix("index.docker.io/") {
43 owned = format!("docker.io/{rest}");
44 owned.as_str()
45 } else {
46 image
47 };
48
49 // 2. Separate digest — everything after '@'. When a digest is present
50 // the tag is informational and is dropped (digest is authoritative).
51 let (ref_no_digest, digest) = match image.split_once('@') {
52 Some((left, right)) => (left, Some(right)),
53 None => (image, None),
54 };
55
56 // 3. Separate tag. The last ':' with no '/' after it is the tag
57 // separator. A colon that is part of a registry hostname (e.g.
58 // `localhost:5000/repo`) always has a '/' after it, so this rule
59 // correctly distinguishes the two cases.
60 let (ref_no_tag, tag) = match ref_no_digest.rfind(':') {
61 Some(pos) if !ref_no_digest[pos..].contains('/') => {
62 (&ref_no_digest[..pos], Some(&ref_no_digest[pos + 1..]))
63 }
64 _ => (ref_no_digest, None),
65 };
66
67 // 4. Detect registry: the first '/'-delimited component is a registry
68 // hostname when it contains '.' or ':' (port). Everything else is
69 // an implicit docker.io reference.
70 let (registry, path) = registry_and_path(ref_no_tag);
71
72 // 5. Single-component docker.io paths get the `library/` prefix.
73 let canonical_path = if registry == "docker.io" && !path.contains('/') {
74 format!("library/{path}")
75 } else {
76 path.to_string()
77 };
78
79 // 6. Suffix: digest wins over tag; absent tag defaults to `:latest`.
80 let suffix = match digest {
81 Some(d) => format!("@{d}"),
82 None => format!(":{}", tag.unwrap_or("latest")),
83 };
84
85 format!("{registry}/{canonical_path}{suffix}")
86}
87
88/// Split a tag-free, digest-free image string into `(registry, path)`.
89///
90/// Returns `("docker.io", whole_string)` when no explicit registry is found.
91fn registry_and_path(image: &str) -> (&str, &str) {
92 if let Some(slash) = image.find('/') {
93 let prefix = &image[..slash];
94 if prefix.contains('.') || prefix.contains(':') {
95 return (prefix, &image[slash + 1..]);
96 }
97 }
98 ("docker.io", image)
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104
105 #[test]
106 fn test_normalize_image_ref() {
107 let cases: &[(&str, &str)] = &[
108 // Bare name and with tag — docker.io library prefix + :latest default.
109 ("alpine", "docker.io/library/alpine:latest"),
110 ("alpine:3.20", "docker.io/library/alpine:3.20"),
111 // Explicit docker.io without library/ — library/ is inserted.
112 ("docker.io/alpine:3.20", "docker.io/library/alpine:3.20"),
113 // Already canonical — idempotent.
114 (
115 "docker.io/library/alpine:3.20",
116 "docker.io/library/alpine:3.20",
117 ),
118 // index.docker.io legacy alias.
119 (
120 "index.docker.io/library/alpine",
121 "docker.io/library/alpine:latest",
122 ),
123 // library/ without a registry prefix.
124 ("library/alpine", "docker.io/library/alpine:latest"),
125 // User-namespaced docker.io image — no extra library/ prefix.
126 ("myuser/myimage:v2", "docker.io/myuser/myimage:v2"),
127 // Non-docker.io registry.
128 ("ghcr.io/owner/repo", "ghcr.io/owner/repo:latest"),
129 ("ghcr.io/owner/repo:v1", "ghcr.io/owner/repo:v1"),
130 // Port in registry — colon-detection must not confuse port with tag.
131 ("localhost:5000/myimage:dev", "localhost:5000/myimage:dev"),
132 ];
133
134 for (input, expected) in cases {
135 assert_eq!(
136 normalize_image_ref(input),
137 *expected,
138 "normalize_image_ref({input:?})"
139 );
140 }
141 }
142
143 #[test]
144 fn test_normalize_digest_refs() {
145 let digest = "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
146
147 // Digest without tag.
148 assert_eq!(
149 normalize_image_ref(&format!("alpine@{digest}")),
150 format!("docker.io/library/alpine@{digest}"),
151 );
152
153 // Digest with tag — tag is dropped, digest is authoritative.
154 assert_eq!(
155 normalize_image_ref(&format!("alpine:3.20@{digest}")),
156 format!("docker.io/library/alpine@{digest}"),
157 );
158 }
159}