Skip to main content

vm_rs/oci/
store.rs

1//! Content-addressable OCI image store.
2//!
3//! Layout:
4//!   <root>/images/
5//!     blobs/sha256/<hash>          -- raw layer tarballs + config JSON
6//!     manifests/<image>/<tag>.json -- cached manifests
7//!
8//! Same image used by multiple services = stored once (dedup by digest).
9
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12
13use serde::Deserialize;
14use sha2::{Digest, Sha256};
15
16use super::registry::OciError;
17
18// ---------------------------------------------------------------------------
19// Typed OCI JSON structures (replacing serde_json::Value)
20// ---------------------------------------------------------------------------
21
22/// Raw OCI / Docker manifest as it appears on the wire.
23#[derive(Deserialize)]
24#[serde(rename_all = "camelCase")]
25struct RawManifest {
26    media_type: Option<String>,
27    schema_version: Option<u64>,
28    config: Option<RawDescriptor>,
29    layers: Option<Vec<RawDescriptor>>,
30}
31
32/// OCI content descriptor (used for config and layer references).
33#[derive(Deserialize)]
34struct RawDescriptor {
35    digest: Option<String>,
36}
37
38/// Raw OCI image configuration JSON.
39#[derive(Deserialize)]
40struct RawImageRoot {
41    config: Option<RawContainerConfig>,
42}
43
44/// Container config section within the image config.
45#[derive(Deserialize)]
46#[allow(non_snake_case)]
47struct RawContainerConfig {
48    Entrypoint: Option<Vec<String>>,
49    Cmd: Option<Vec<String>>,
50    Env: Option<Vec<String>>,
51    WorkingDir: Option<String>,
52    User: Option<String>,
53    ExposedPorts: Option<HashMap<String, serde_json::Value>>,
54}
55
56/// Local OCI blob store.
57pub struct ImageStore {
58    root: PathBuf,
59}
60
61/// Parsed OCI image manifest.
62#[derive(Debug, Clone)]
63pub struct ImageManifest {
64    pub config_digest: String,
65    pub layer_digests: Vec<String>,
66    pub media_type: String,
67}
68
69/// Parsed OCI image config (the parts needed to run the process).
70#[derive(Debug, Clone, Default)]
71pub struct ImageConfig {
72    pub entrypoint: Vec<String>,
73    pub cmd: Vec<String>,
74    pub env: Vec<String>,
75    pub working_dir: String,
76    pub user: String,
77    pub exposed_ports: Vec<u16>,
78}
79
80impl ImageStore {
81    pub fn new(data_dir: &Path) -> Result<Self, OciError> {
82        let root = data_dir.join("images");
83        std::fs::create_dir_all(root.join("blobs/sha256"))?;
84        std::fs::create_dir_all(root.join("manifests"))?;
85        Ok(ImageStore { root })
86    }
87
88    /// Check if a blob exists locally.
89    pub fn has_blob(&self, digest: &str) -> bool {
90        self.blob_path(digest).exists()
91    }
92
93    /// Get the path to a blob by its sha256 digest.
94    pub fn blob_path(&self, digest: &str) -> PathBuf {
95        let hash = digest.strip_prefix("sha256:").unwrap_or(digest);
96        self.root.join("blobs/sha256").join(hash)
97    }
98
99    /// Write a blob and verify its digest.
100    pub fn put_blob(&self, digest: &str, data: &[u8]) -> Result<PathBuf, OciError> {
101        let expected_hash = digest.strip_prefix("sha256:").unwrap_or(digest);
102
103        let mut hasher = Sha256::new();
104        hasher.update(data);
105        let actual_hash = format!("{:x}", hasher.finalize());
106        if actual_hash != expected_hash {
107            return Err(OciError::Blob(format!(
108                "digest mismatch: expected sha256:{}, got sha256:{}",
109                expected_hash, actual_hash
110            )));
111        }
112
113        let path = self.blob_path(digest);
114        // Write to temp file then atomic rename to avoid corrupt files on crash
115        let tmp_path = path.with_extension("tmp");
116        std::fs::write(&tmp_path, data)?;
117        {
118            let f = std::fs::File::open(&tmp_path)?;
119            f.sync_all()?;
120        }
121        std::fs::rename(&tmp_path, &path)?;
122        Ok(path)
123    }
124
125    /// Read a blob's bytes.
126    pub fn get_blob(&self, digest: &str) -> Result<Vec<u8>, OciError> {
127        let path = self.blob_path(digest);
128        std::fs::read(&path)
129            .map_err(|e| OciError::Blob(format!("failed to read blob {}: {}", digest, e)))
130    }
131
132    /// Save a manifest for an image reference.
133    pub fn put_manifest(&self, image: &str, tag: &str, data: &[u8]) -> Result<(), OciError> {
134        let dir = self.root.join("manifests").join(sanitize_name(image));
135        std::fs::create_dir_all(&dir)?;
136        std::fs::write(dir.join(format!("{}.json", sanitize_name(tag))), data)?;
137        Ok(())
138    }
139
140    /// Load a cached manifest. Returns `Ok(None)` if not cached.
141    pub fn get_manifest(&self, image: &str, tag: &str) -> Result<Option<Vec<u8>>, OciError> {
142        let path = self
143            .root
144            .join("manifests")
145            .join(sanitize_name(image))
146            .join(format!("{}.json", sanitize_name(tag)));
147        match std::fs::read(&path) {
148            Ok(data) => Ok(Some(data)),
149            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
150            Err(e) => Err(OciError::Io(e)),
151        }
152    }
153
154    /// List all cached images as (repository, tag, layers, size_bytes) tuples.
155    pub fn list_images(&self) -> Result<Vec<(String, String, usize, u64)>, OciError> {
156        let manifests_dir = self.root.join("manifests");
157        let mut results = Vec::new();
158
159        let repos = std::fs::read_dir(&manifests_dir)?;
160
161        for repo_entry in repos {
162            let repo_entry = repo_entry?;
163            if !repo_entry.file_type()?.is_dir() {
164                continue;
165            }
166            let repo_name = unsanitize_name(&repo_entry.file_name().to_string_lossy());
167            let tags = std::fs::read_dir(repo_entry.path())?;
168
169            for tag_entry in tags {
170                let tag_entry = tag_entry?;
171                let filename = tag_entry.file_name().to_string_lossy().to_string();
172                if !filename.ends_with(".json") {
173                    continue;
174                }
175                let tag = unsanitize_name(&filename[..filename.len() - 5]);
176
177                let data = std::fs::read(tag_entry.path())?;
178                let manifest = Self::parse_manifest(&data)?;
179                let size: u64 = manifest
180                    .layer_digests
181                    .iter()
182                    .map(|d| {
183                        let path = self.blob_path(d);
184                        std::fs::metadata(&path).map(|m| m.len())
185                    })
186                    .collect::<Result<Vec<_>, _>>()?
187                    .into_iter()
188                    .sum();
189                let layers = manifest.layer_digests.len();
190
191                results.push((repo_name.clone(), tag, layers, size));
192            }
193        }
194
195        results.sort_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
196        Ok(results)
197    }
198
199    /// Parse an OCI/Docker manifest JSON.
200    pub fn parse_manifest(data: &[u8]) -> Result<ImageManifest, OciError> {
201        let raw: RawManifest = serde_json::from_slice(data)
202            .map_err(|e| OciError::ManifestParse(format!("invalid JSON: {}", e)))?;
203
204        let media_type = raw
205            .media_type
206            .as_deref()
207            .or_else(|| raw.schema_version.map(|_| ""))
208            .unwrap_or("");
209
210        if media_type.contains("manifest.list") || media_type.contains("index") {
211            return Err(OciError::ManifestList);
212        }
213
214        let config_digest = raw
215            .config
216            .and_then(|c| c.digest)
217            .ok_or_else(|| OciError::ManifestParse("missing config digest".into()))?;
218
219        let layers = raw
220            .layers
221            .ok_or_else(|| OciError::ManifestParse("missing layers".into()))?;
222        let layer_digests: Vec<String> = layers
223            .into_iter()
224            .map(|l| {
225                l.digest
226                    .ok_or_else(|| OciError::ManifestParse("missing layer digest".into()))
227            })
228            .collect::<Result<_, _>>()?;
229
230        Ok(ImageManifest {
231            config_digest,
232            layer_digests,
233            media_type: media_type.to_string(),
234        })
235    }
236
237    /// Parse image config JSON to extract entrypoint, cmd, env, etc.
238    pub fn parse_config(data: &[u8]) -> Result<ImageConfig, OciError> {
239        let root: RawImageRoot = serde_json::from_slice(data)
240            .map_err(|e| OciError::ManifestParse(format!("invalid config JSON: {}", e)))?;
241
242        let cfg = root.config.unwrap_or(RawContainerConfig {
243            Entrypoint: None,
244            Cmd: None,
245            Env: None,
246            WorkingDir: None,
247            User: None,
248            ExposedPorts: None,
249        });
250
251        let exposed_ports = if let Some(obj) = cfg.ExposedPorts {
252            let mut ports = Vec::with_capacity(obj.len());
253            for key in obj.keys() {
254                let port = key
255                    .split('/')
256                    .next()
257                    .ok_or_else(|| {
258                        OciError::ManifestParse(format!("invalid exposed port '{}'", key))
259                    })?
260                    .parse::<u16>()
261                    .map_err(|e| {
262                        OciError::ManifestParse(format!("invalid exposed port '{}': {}", key, e))
263                    })?;
264                ports.push(port);
265            }
266            ports
267        } else {
268            Vec::new()
269        };
270
271        Ok(ImageConfig {
272            entrypoint: cfg.Entrypoint.unwrap_or_default(),
273            cmd: cfg.Cmd.unwrap_or_default(),
274            env: cfg.Env.unwrap_or_default(),
275            working_dir: cfg.WorkingDir.unwrap_or_default(),
276            user: cfg.User.unwrap_or_default(),
277            exposed_ports,
278        })
279    }
280
281    /// Extract all layers of an image into a target directory (for rootfs preparation).
282    pub fn extract_layers(&self, manifest: &ImageManifest, target: &Path) -> Result<(), OciError> {
283        std::fs::create_dir_all(target)?;
284
285        for (i, digest) in manifest.layer_digests.iter().enumerate() {
286            let blob_path = self.blob_path(digest);
287            if !blob_path.exists() {
288                return Err(OciError::Blob(format!("missing layer blob: {}", digest)));
289            }
290
291            tracing::info!(
292                "extracting layer {}/{}: {}",
293                i + 1,
294                manifest.layer_digests.len(),
295                &digest[..19.min(digest.len())]
296            );
297
298            let file = std::fs::File::open(&blob_path)?;
299            let reader: Box<dyn std::io::Read> = if is_gzip(&blob_path)? {
300                Box::new(flate2::read::GzDecoder::new(file))
301            } else {
302                Box::new(file)
303            };
304
305            let mut archive = tar::Archive::new(reader);
306            archive.set_preserve_permissions(true);
307            archive.set_preserve_ownerships(false);
308            archive.set_unpack_xattrs(false);
309            archive.set_overwrite(true);
310
311            for entry in archive
312                .entries()
313                .map_err(|e| OciError::Blob(format!("tar read error: {}", e)))?
314            {
315                let mut entry =
316                    entry.map_err(|e| OciError::Blob(format!("tar entry error: {}", e)))?;
317                let path = entry
318                    .path()
319                    .map_err(|e| OciError::Blob(format!("tar path error: {}", e)))?
320                    .to_path_buf();
321                let path_str = path.to_string_lossy();
322
323                // Handle whiteout files (.wh.*)
324                if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
325                    if let Some(deleted_name) = filename.strip_prefix(".wh.") {
326                        if deleted_name == ".wh..opq" {
327                            if let Some(parent) = path.parent() {
328                                let full_parent = target.join(parent);
329                                if full_parent.exists() {
330                                    let entries = std::fs::read_dir(&full_parent).map_err(|e| {
331                                        OciError::Blob(format!(
332                                            "opaque whiteout read_dir failed for {}: {}",
333                                            full_parent.display(),
334                                            e
335                                        ))
336                                    })?;
337                                    for child in entries.flatten() {
338                                        let child_path = child.path();
339                                        let remove_result = if child
340                                            .file_type()
341                                            .map(|ft| ft.is_dir())
342                                            .unwrap_or(false)
343                                        {
344                                            std::fs::remove_dir_all(&child_path)
345                                        } else {
346                                            std::fs::remove_file(&child_path)
347                                        };
348                                        if let Err(e) = remove_result {
349                                            tracing::warn!(path = %child_path.display(), "opaque whiteout cleanup failed: {}", e);
350                                        }
351                                    }
352                                }
353                            }
354                        } else if let Some(parent) = path.parent() {
355                            let deleted_path = target.join(parent).join(deleted_name);
356                            // Try file first, then directory — whiteout target could be either
357                            if std::fs::remove_file(&deleted_path).is_err() {
358                                if let Err(e) = std::fs::remove_dir_all(&deleted_path) {
359                                    tracing::debug!(path = %deleted_path.display(), "whiteout target not found (may not exist in lower layers): {}", e);
360                                }
361                            }
362                        }
363                        continue;
364                    }
365                }
366
367                // Skip absolute paths and path traversal.
368                // Check each component individually — "foo..bar" is safe, "../foo" is not.
369                let has_traversal = path.components().any(|c| {
370                    matches!(
371                        c,
372                        std::path::Component::ParentDir | std::path::Component::RootDir
373                    )
374                });
375                if has_traversal {
376                    tracing::warn!(path = %path_str, "skipping tar entry with path traversal");
377                    continue;
378                }
379
380                entry
381                    .unpack_in(target)
382                    .map_err(|e| OciError::Blob(format!("unpack error for {}: {}", path_str, e)))?;
383            }
384        }
385
386        Ok(())
387    }
388}
389
390fn is_gzip(path: &Path) -> Result<bool, OciError> {
391    let mut f = std::fs::File::open(path)?;
392    let mut magic = [0u8; 2];
393    use std::io::Read;
394    if f.read(&mut magic).map_err(OciError::Io)? == 2 {
395        Ok(magic[0] == 0x1f && magic[1] == 0x8b)
396    } else {
397        Ok(false)
398    }
399}
400
401fn sanitize_name(s: &str) -> String {
402    s.replace('/', "_slash_").replace(':', "_colon_")
403}
404
405fn unsanitize_name(s: &str) -> String {
406    s.replace("_slash_", "/").replace("_colon_", ":")
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn parse_docker_manifest() {
415        let manifest_json = r#"{
416            "schemaVersion": 2,
417            "mediaType": "application/vnd.docker.distribution.manifest.v2+json",
418            "config": {
419                "mediaType": "application/vnd.docker.container.image.v1+json",
420                "size": 7023,
421                "digest": "sha256:abc123"
422            },
423            "layers": [
424                {
425                    "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
426                    "size": 32654,
427                    "digest": "sha256:layer1"
428                },
429                {
430                    "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
431                    "size": 16724,
432                    "digest": "sha256:layer2"
433                }
434            ]
435        }"#;
436
437        let manifest =
438            ImageStore::parse_manifest(manifest_json.as_bytes()).expect("manifest should parse");
439        assert_eq!(manifest.config_digest, "sha256:abc123");
440        assert_eq!(manifest.layer_digests.len(), 2);
441    }
442
443    #[test]
444    fn parse_image_config() {
445        let config_json = r#"{
446            "config": {
447                "Env": ["PATH=/usr/local/sbin:/usr/local/bin", "NGINX_VERSION=1.25"],
448                "Cmd": ["nginx", "-g", "daemon off;"],
449                "WorkingDir": "/",
450                "ExposedPorts": { "80/tcp": {} }
451            }
452        }"#;
453
454        let config = ImageStore::parse_config(config_json.as_bytes()).expect("config should parse");
455        assert_eq!(config.cmd, vec!["nginx", "-g", "daemon off;"]);
456        assert_eq!(config.env.len(), 2);
457        assert_eq!(config.exposed_ports, vec![80]);
458    }
459
460    #[test]
461    fn blob_path_strips_prefix() {
462        let tmp = tempfile::tempdir().expect("tempdir");
463        let store = ImageStore::new(tmp.path()).expect("store");
464        let path = store.blob_path("sha256:abc123def456");
465        let suffix = std::path::Path::new("blobs")
466            .join("sha256")
467            .join("abc123def456");
468        assert!(path.ends_with(&suffix));
469    }
470
471    #[test]
472    fn sanitize_roundtrip() {
473        let name = "docker.io/library/nginx:latest";
474        let sanitized = sanitize_name(name);
475        assert!(!sanitized.contains('/'));
476        assert!(!sanitized.contains(':'));
477        let unsanitized = unsanitize_name(&sanitized);
478        assert_eq!(unsanitized, name);
479    }
480
481    #[test]
482    fn sanitize_simple_name() {
483        let name = "alpine";
484        let sanitized = sanitize_name(name);
485        assert_eq!(sanitized, "alpine");
486    }
487
488    #[test]
489    fn parse_manifest_missing_config() {
490        let manifest_json = r#"{"schemaVersion": 2, "layers": []}"#;
491        let result = ImageStore::parse_manifest(manifest_json.as_bytes());
492        assert!(result.is_err());
493    }
494
495    #[test]
496    fn parse_manifest_missing_layer_digest() {
497        let manifest_json = r#"{
498            "schemaVersion": 2,
499            "config": { "digest": "sha256:cfg" },
500            "layers": [{}, { "digest": "sha256:layer2" }]
501        }"#;
502        let err = ImageStore::parse_manifest(manifest_json.as_bytes())
503            .expect_err("missing digest should fail");
504        assert!(err.to_string().contains("missing layer digest"));
505    }
506
507    #[test]
508    fn parse_config_minimal() {
509        let config_json = r#"{"config": {}}"#;
510        let config = ImageStore::parse_config(config_json.as_bytes()).expect("config should parse");
511        assert!(config.cmd.is_empty());
512        assert!(config.env.is_empty());
513        assert!(config.exposed_ports.is_empty());
514    }
515
516    #[test]
517    fn parse_config_with_entrypoint() {
518        let config_json = r#"{
519            "config": {
520                "Entrypoint": ["/docker-entrypoint.sh"],
521                "Cmd": ["nginx"]
522            }
523        }"#;
524        let config = ImageStore::parse_config(config_json.as_bytes()).expect("config should parse");
525        assert_eq!(config.entrypoint, vec!["/docker-entrypoint.sh"]);
526        assert_eq!(config.cmd, vec!["nginx"]);
527    }
528
529    #[test]
530    fn parse_config_exposed_ports_multiple() {
531        let config_json = r#"{
532            "config": {
533                "ExposedPorts": { "80/tcp": {}, "443/tcp": {}, "8080/tcp": {} }
534            }
535        }"#;
536        let config = ImageStore::parse_config(config_json.as_bytes()).expect("config should parse");
537        let mut ports = config.exposed_ports.clone();
538        ports.sort();
539        assert_eq!(ports, vec![80, 443, 8080]);
540    }
541}