Skip to main content

composefs_oci/
oci_image.rs

1//! OCI image and artifact storage for composefs.
2//!
3//! This module provides native OCI storage in composefs repositories. The key insight
4//! is that OCI is a simple, extensible format that can represent any content - not just
5//! container images. By standardizing on OCI, we get:
6//!
7//! - A well-defined manifest format with content-addressed blobs
8//! - Built-in support for signatures (cosign, notation)
9//! - Existing tooling (skopeo, crane, oras)
10//! - A clear GC model: manifests are roots, everything else is garbage-collectable
11//!
12//! # Storage Model
13//!
14//! ```text
15//! streams/
16//!   oci-manifest-sha256:abc...  -> objects/XX/YYY  (manifest splitstream)
17//!   oci-config-sha256:def...    -> objects/XX/YYY  (config splitstream)  
18//!   oci-layer-sha256:ghi...     -> objects/XX/YYY  (layer splitstream)
19//!   refs/
20//!     oci/
21//!       myimage:latest          -> ../../oci-manifest-sha256:abc...  (GC root!)
22//!       myimage:v1.0            -> ../../oci-manifest-sha256:xyz...
23//! ```
24//!
25//! Named references under `refs/oci/` act as GC roots. Manifests without references
26//! will be garbage collected along with their unreferenced configs and layers.
27//!
28//! # Container Images vs Artifacts
29//!
30//! Container images have:
31//! - Config with `application/vnd.oci.image.config.v1+json` mediaType
32//! - Layers that are tar archives (gzip, zstd, or uncompressed)
33//!
34//! Artifacts can have:
35//! - Any config mediaType (or empty config)
36//! - Any blob types as "layers"
37//!
38//! This module handles both transparently. Use `is_container_image()` to check.
39
40use std::collections::{HashMap, HashSet};
41use std::sync::Arc;
42
43use anyhow::{Context, Result, ensure};
44use containers_image_proxy::oci_spec::image::{
45    Descriptor, Digest as OciDigest, ImageConfiguration, ImageManifest, MediaType,
46};
47use rustix::fs::{AtFlags, Dir, Mode, OFlags, openat, readlinkat, unlinkat};
48use rustix::io::Errno;
49use serde::Serialize;
50
51use composefs::{
52    erofs::format::{FormatEpoch, FormatVersion},
53    fsverity::FsVerityHashValue,
54    repository::Repository,
55};
56
57use crate::ContentAndVerity;
58use crate::layer::is_tar_media_type;
59use crate::skopeo::{OCI_BLOB_CONTENT_TYPE, OCI_CONFIG_CONTENT_TYPE, OCI_MANIFEST_CONTENT_TYPE};
60
61/// Error marker: an OCI reference (tag) does not exist.
62#[derive(Debug, thiserror::Error)]
63#[error("OCI reference not found: {name}")]
64pub struct OciRefNotFound {
65    /// The reference name that was not found.
66    pub name: String,
67}
68
69/// Error marker: an OCI manifest/image does not exist in the repository.
70#[derive(Debug, thiserror::Error)]
71#[error("OCI image not found: {digest}")]
72pub struct OciImageNotFound {
73    /// The manifest digest that was not found.
74    pub digest: String,
75}
76
77/// Data and named refs from a splitstream with external object storage.
78type ExternalData<ObjectID> = (Vec<u8>, HashMap<Box<str>, ObjectID>);
79
80/// Open a splitstream that stores its payload as a single external object.
81///
82/// Manifests, configs, and blobs are stored as external objects (not inline)
83/// so that fsverity can be independently enabled on the raw content. This
84/// function opens the splitstream, verifies it contains exactly one external
85/// object reference, and returns that object's data along with the stream's
86/// named refs (used for GC reachability to configs and layers).
87pub(crate) fn read_external_splitstream<ObjectID: FsVerityHashValue>(
88    repo: &Repository<ObjectID>,
89    content_id: &str,
90    verity: Option<&ObjectID>,
91    expected_content_type: Option<u64>,
92) -> Result<ExternalData<ObjectID>> {
93    let mut stream = repo.open_stream(content_id, verity, expected_content_type)?;
94
95    let mut object_refs = Vec::new();
96    stream.get_object_refs(|id| object_refs.push(id.clone()))?;
97    ensure!(
98        object_refs.len() == 1,
99        "Expected exactly 1 external object in splitstream, got {}",
100        object_refs.len()
101    );
102
103    let data = repo.read_object(&object_refs[0])?;
104    let named_refs = stream.into_named_refs();
105    Ok((data, named_refs))
106}
107
108/// Prefix for OCI image references in the repository.
109pub const OCI_REF_PREFIX: &str = "oci/";
110
111/// An OCI image or artifact stored in a composefs repository.
112///
113/// This type provides access to the complete OCI structure including
114/// manifest, config, and layer/blob references. All metadata is stored
115/// locally, eliminating network access for queries.
116#[derive(Debug)]
117pub struct OciImage<ObjectID: FsVerityHashValue> {
118    /// The manifest digest (sha256 content hash)
119    manifest_digest: OciDigest,
120    /// The parsed OCI manifest
121    manifest: ImageManifest,
122    /// The config digest (sha256 content hash)
123    config_digest: OciDigest,
124    /// The fs-verity ID of the config splitstream
125    config_verity: ObjectID,
126    /// The parsed OCI config (may be empty for artifacts)
127    config: Option<ImageConfiguration>,
128    /// Map from layer diff_id to its fs-verity object ID
129    layer_refs: HashMap<Box<str>, ObjectID>,
130    /// The V2 EROFS image ObjectID linked to this config, if any
131    image_ref: Option<ObjectID>,
132    /// The V1 EROFS image ObjectID linked to this config, if any
133    image_ref_v1: Option<ObjectID>,
134    /// The V2 boot EROFS image ObjectID linked to this config, if any
135    boot_image_ref: Option<ObjectID>,
136    /// The V1 boot EROFS image ObjectID linked to this config, if any
137    boot_image_ref_v1: Option<ObjectID>,
138    /// The fs-verity ID of the manifest splitstream
139    manifest_verity: ObjectID,
140}
141
142impl<ObjectID: FsVerityHashValue> OciImage<ObjectID> {
143    /// Opens an OCI image by its manifest digest.
144    ///
145    /// If `verity` is provided, it's used directly for fast lookup.
146    /// Otherwise, the content is verified against the digest.
147    pub fn open(
148        repo: &Repository<ObjectID>,
149        manifest_digest: &OciDigest,
150        verity: Option<&ObjectID>,
151    ) -> Result<Self> {
152        let manifest_id = manifest_identifier(manifest_digest);
153        let (data, named_refs) =
154            read_external_splitstream(repo, &manifest_id, verity, Some(OCI_MANIFEST_CONTENT_TYPE))?;
155
156        // Verify content hash when no verity was provided
157        if verity.is_none() {
158            let computed = hash_sha256(&data);
159            ensure!(
160                *manifest_digest == computed,
161                "Manifest integrity failed: expected {manifest_digest}, got {computed}"
162            );
163        }
164
165        let manifest = ImageManifest::from_reader(&data[..])?;
166
167        let config_digest = manifest.config().digest().clone();
168        let config_key = format!("config:{config_digest}");
169        let config_verity = named_refs
170            .get(config_key.as_str())
171            .context("Manifest missing config reference")?
172            .clone();
173
174        let config_id = crate::config_identifier(&config_digest);
175        let (config_data, config_named_refs) = read_external_splitstream(
176            repo,
177            &config_id,
178            Some(&config_verity),
179            Some(OCI_CONFIG_CONTENT_TYPE),
180        )?;
181
182        // Try to parse as ImageConfiguration, but don't fail for artifacts
183        let (config, mut layer_refs) = match manifest.config().media_type() {
184            MediaType::ImageConfig => {
185                let config = ImageConfiguration::from_reader(&config_data[..])?;
186                (Some(config), config_named_refs)
187            }
188            _ => {
189                // Artifact - layer refs are in the manifest's named refs.
190                // Filter to only include refs matching known layer digests
191                // from the manifest, rather than removing the config key
192                // and hoping nothing else leaks through.
193                let layer_digests: HashSet<&str> = manifest
194                    .layers()
195                    .iter()
196                    .map(|d| d.digest().as_ref())
197                    .collect();
198                let refs = named_refs
199                    .into_iter()
200                    .filter(|(k, _)| layer_digests.contains(k.as_ref()))
201                    .collect();
202                (None, refs)
203            }
204        };
205
206        // Strip the EROFS image refs from layer_refs (they're not layers)
207        let image_ref = layer_refs.remove(crate::IMAGE_REF_KEY);
208        let image_ref_v1 = layer_refs.remove(crate::IMAGE_REF_KEY_V1);
209        let boot_image_ref = layer_refs.remove(crate::BOOT_IMAGE_REF_KEY);
210        let boot_image_ref_v1 = layer_refs.remove(crate::BOOT_IMAGE_REF_KEY_V1);
211
212        let manifest_verity = if let Some(v) = verity {
213            v.clone()
214        } else {
215            match repo.has_stream(&manifest_id)? {
216                Some(v) => v,
217                None => {
218                    return Err(anyhow::Error::new(OciImageNotFound {
219                        digest: manifest_digest.to_string(),
220                    }));
221                }
222            }
223        };
224
225        Ok(Self {
226            manifest_digest: manifest_digest.clone(),
227            manifest,
228            config_digest,
229            config_verity,
230            config,
231            layer_refs,
232            image_ref,
233            image_ref_v1,
234            boot_image_ref,
235            boot_image_ref_v1,
236            manifest_verity,
237        })
238    }
239
240    /// Opens an OCI image by its tag/reference name.
241    pub fn open_ref(repo: &Repository<ObjectID>, name: &str) -> Result<Self> {
242        let (manifest_digest, verity) = resolve_ref(repo, name)?;
243        Self::open(repo, &manifest_digest, Some(&verity))
244    }
245
246    /// Returns true if this is a container image (vs an artifact).
247    pub fn is_container_image(&self) -> bool {
248        matches!(self.manifest.config().media_type(), MediaType::ImageConfig)
249    }
250
251    /// Returns the manifest digest.
252    pub fn manifest_digest(&self) -> &OciDigest {
253        &self.manifest_digest
254    }
255
256    /// Returns the manifest fs-verity hash.
257    pub fn manifest_verity(&self) -> &ObjectID {
258        &self.manifest_verity
259    }
260
261    /// Returns the OCI manifest.
262    pub fn manifest(&self) -> &ImageManifest {
263        &self.manifest
264    }
265
266    /// Returns the config digest.
267    pub fn config_digest(&self) -> &OciDigest {
268        &self.config_digest
269    }
270
271    /// Returns the config fs-verity hash.
272    pub fn config_verity(&self) -> &ObjectID {
273        &self.config_verity
274    }
275
276    /// Returns the OCI config, if this is a container image.
277    pub fn config(&self) -> Option<&ImageConfiguration> {
278        self.config.as_ref()
279    }
280
281    /// Returns the layer refs map (diff_id → fs-verity ObjectID).
282    pub fn layer_refs(&self) -> &HashMap<Box<str>, ObjectID> {
283        &self.layer_refs
284    }
285
286    /// Returns the EROFS image ObjectID for `version`, if present.
287    ///
288    /// Maps `version` to its on-disk storage slot via [`FormatVersion::epoch`]:
289    /// epoch1 (V0/V1) resolves the V1 ref; epoch2 (V2) resolves the V2 ref.
290    /// No fallback — returns `None` if that specific format was not generated.
291    pub fn image_ref(&self, version: FormatVersion) -> Option<&ObjectID> {
292        match version.epoch() {
293            FormatEpoch::Epoch1 => self.image_ref_v1.as_ref(),
294            FormatEpoch::Epoch2 => self.image_ref.as_ref(),
295        }
296    }
297
298    /// Returns the V2 EROFS image ObjectID linked to this config, if any.
299    pub fn image_ref_v2(&self) -> Option<&ObjectID> {
300        self.image_ref.as_ref()
301    }
302
303    /// Returns the V1 EROFS image ObjectID linked to this config, if any.
304    pub fn image_ref_v1(&self) -> Option<&ObjectID> {
305        self.image_ref_v1.as_ref()
306    }
307
308    /// Returns the boot EROFS image ObjectID for `version`, if present.
309    ///
310    /// Maps `version` to its on-disk storage slot via [`FormatVersion::epoch`].
311    /// No fallback — returns `None` if that specific format was not generated.
312    pub fn boot_image_ref(&self, version: FormatVersion) -> Option<&ObjectID> {
313        match version.epoch() {
314            FormatEpoch::Epoch1 => self.boot_image_ref_v1.as_ref(),
315            FormatEpoch::Epoch2 => self.boot_image_ref.as_ref(),
316        }
317    }
318
319    /// Returns the V2 boot EROFS image ObjectID linked to this config, if any.
320    pub fn boot_image_ref_v2(&self) -> Option<&ObjectID> {
321        self.boot_image_ref.as_ref()
322    }
323
324    /// Returns the V1 boot EROFS image ObjectID linked to this config, if any.
325    pub fn boot_image_ref_v1(&self) -> Option<&ObjectID> {
326        self.boot_image_ref_v1.as_ref()
327    }
328
329    /// Returns the image architecture (empty string for artifacts).
330    pub fn architecture(&self) -> String {
331        self.config
332            .as_ref()
333            .map(|c| c.architecture().to_string())
334            .unwrap_or_default()
335    }
336
337    /// Returns the image OS (empty string for artifacts).
338    pub fn os(&self) -> String {
339        self.config
340            .as_ref()
341            .map(|c| c.os().to_string())
342            .unwrap_or_default()
343    }
344
345    /// Returns the creation timestamp.
346    pub fn created(&self) -> Option<&str> {
347        self.config.as_ref().and_then(|c| c.created().as_deref())
348    }
349
350    /// Opens an artifact layer's backing object by index, returning a
351    /// read-only file descriptor to the raw blob data.
352    ///
353    /// This only works for non-tar layers (OCI artifacts). Returns an
354    /// error for tar layers — use the splitstream API for those.
355    pub fn open_layer_fd(
356        &self,
357        repo: &Repository<ObjectID>,
358        index: usize,
359    ) -> Result<rustix::fd::OwnedFd> {
360        let descriptor = self
361            .manifest
362            .layers()
363            .get(index)
364            .with_context(|| format!("Layer index {index} out of range"))?;
365
366        ensure!(
367            !is_tar_media_type(descriptor.media_type()),
368            "open_layer_fd does not support tar layers (media type: {}); \
369             use the splitstream API instead",
370            descriptor.media_type()
371        );
372
373        let diff_id = descriptor.digest();
374        let layer_verity = self
375            .layer_verity(diff_id.as_ref())
376            .with_context(|| format!("No verity for layer {diff_id}"))?;
377
378        let content_id = crate::layer_identifier(diff_id);
379        let mut stream = repo.open_stream(&content_id, Some(layer_verity), None)?;
380
381        // Artifact layers are stored as a single object; the splitstream
382        // exists only for GC tracking.
383        let mut object_refs = vec![];
384        stream.get_object_refs(|id| object_refs.push(id.clone()))?;
385        ensure!(
386            object_refs.len() == 1,
387            "Expected exactly 1 external ref for artifact layer, got {}",
388            object_refs.len()
389        );
390        repo.open_object(&object_refs[0])
391    }
392
393    /// Returns the layer diff_ids (for container images).
394    pub fn layer_diff_ids(&self) -> Vec<&str> {
395        self.config
396            .as_ref()
397            .map(|c| c.rootfs().diff_ids().iter().map(|s| s.as_str()).collect())
398            .unwrap_or_default()
399    }
400
401    /// Returns the fs-verity ID for a layer.
402    pub fn layer_verity(&self, diff_id: &str) -> Option<&ObjectID> {
403        self.layer_refs.get(diff_id)
404    }
405
406    /// Returns layer descriptors from the manifest.
407    pub fn layer_descriptors(&self) -> &[Descriptor] {
408        self.manifest.layers()
409    }
410
411    /// Returns a label from the config.
412    pub fn label(&self, key: &str) -> Option<&str> {
413        self.config.as_ref().and_then(|c| {
414            c.config()
415                .as_ref()
416                .and_then(|cfg| cfg.labels().as_ref())
417                .and_then(|labels| labels.get(key).map(|s| s.as_str()))
418        })
419    }
420
421    /// Returns all labels from the config.
422    pub fn labels(&self) -> Option<&HashMap<String, String>> {
423        self.config
424            .as_ref()
425            .and_then(|c| c.config().as_ref())
426            .and_then(|cfg| cfg.labels().as_ref())
427    }
428
429    /// Reads the raw manifest JSON bytes from the repository.
430    ///
431    /// This retrieves the original manifest JSON as stored, which may differ
432    /// slightly from re-serializing the parsed manifest (e.g., whitespace).
433    pub fn read_manifest_json(&self, repo: &Repository<ObjectID>) -> Result<Vec<u8>> {
434        let manifest_id = manifest_identifier(&self.manifest_digest);
435        let (data, _) = read_external_splitstream(
436            repo,
437            &manifest_id,
438            Some(&self.manifest_verity),
439            Some(OCI_MANIFEST_CONTENT_TYPE),
440        )?;
441        Ok(data)
442    }
443
444    /// Reads the raw config JSON bytes from the repository.
445    ///
446    /// This retrieves the original config JSON as stored, which may differ
447    /// slightly from re-serializing the parsed config (e.g., whitespace).
448    pub fn read_config_json(&self, repo: &Repository<ObjectID>) -> Result<Vec<u8>> {
449        let config_id = crate::config_identifier(&self.config_digest);
450
451        let (data, _) = read_external_splitstream(
452            repo,
453            &config_id,
454            Some(&self.config_verity),
455            Some(OCI_CONFIG_CONTENT_TYPE),
456        )?;
457        Ok(data)
458    }
459
460    /// Returns the full inspect output as a JSON value.
461    ///
462    /// This includes the manifest, config, and referrers in a single JSON object.
463    /// The manifest and config are included as their original JSON structure.
464    pub fn inspect_json(&self, repo: &Repository<ObjectID>) -> Result<serde_json::Value> {
465        let manifest_json = self.read_manifest_json(repo)?;
466        let config_json = self.read_config_json(repo)?;
467        let referrers = list_referrers(repo, &self.manifest_digest)?;
468
469        let manifest_value: serde_json::Value = serde_json::from_slice(&manifest_json)?;
470        let config_value: serde_json::Value = serde_json::from_slice(&config_json)?;
471
472        let referrers_value: Vec<serde_json::Value> = referrers
473            .iter()
474            .map(|(digest, _verity)| serde_json::json!({ "digest": digest }))
475            .collect();
476
477        let mut result = serde_json::json!({
478            "manifest": manifest_value,
479            "config": config_value,
480            "referrers": referrers_value,
481        });
482
483        if let Some(erofs_id) = self.image_ref(repo.erofs_version()) {
484            result["composefs_erofs"] = serde_json::json!(erofs_id.to_hex());
485        }
486
487        if let Some(boot_id) = self.boot_image_ref(repo.erofs_version()) {
488            result["composefs_boot_erofs"] = serde_json::json!(boot_id.to_hex());
489        }
490
491        Ok(result)
492    }
493}
494
495// =============================================================================
496// Reference Management (GC Roots)
497// =============================================================================
498
499/// Validate that a ref name doesn't start with `@`, which is reserved as
500/// the digest prefix (e.g. `@sha256:abc...`).
501fn validate_ref_name(name: &str) -> Result<()> {
502    ensure!(
503        !name.starts_with('@'),
504        "Invalid ref name {name:?}: leading '@' is reserved for digest references"
505    );
506    Ok(())
507}
508
509/// Tags an image with a name, making it a GC root.
510///
511/// The name should be in the format `image:tag` or just `image` (implies `:latest`).
512/// Names must not contain `@`, which is reserved for digest references.
513pub fn tag_image<ObjectID: FsVerityHashValue>(
514    repo: &Repository<ObjectID>,
515    manifest_digest: &OciDigest,
516    name: &str,
517) -> Result<()> {
518    validate_ref_name(name)?;
519    let manifest_id = manifest_identifier(manifest_digest);
520    let ref_name = oci_ref_path(name);
521    repo.name_stream(&manifest_id, &ref_name)
522}
523
524/// Removes a tag from an image.
525///
526/// The image data is not deleted; it becomes eligible for garbage collection
527/// if no other references point to it.
528pub fn untag_image<ObjectID: FsVerityHashValue>(
529    repo: &Repository<ObjectID>,
530    name: &str,
531) -> Result<()> {
532    let ref_path = format!("streams/refs/{}", oci_ref_path(name));
533    unlinkat(repo.repo_fd(), &ref_path, AtFlags::empty())
534        .with_context(|| format!("Failed to remove tag {name}"))?;
535    Ok(())
536}
537
538/// Resolves a reference name to (manifest_digest, verity).
539pub fn resolve_ref<ObjectID: FsVerityHashValue>(
540    repo: &Repository<ObjectID>,
541    name: &str,
542) -> Result<(OciDigest, ObjectID)> {
543    let ref_path = format!("streams/refs/{}", oci_ref_path(name));
544
545    // Read the symlink to get the manifest path
546    let target = match readlinkat(repo.repo_fd(), &ref_path, vec![]) {
547        Ok(t) => t,
548        Err(Errno::NOENT) => {
549            return Err(anyhow::Error::new(OciRefNotFound {
550                name: name.to_string(),
551            }));
552        }
553        Err(e) => {
554            return Err(e).with_context(|| format!("Reference {name} not found"));
555        }
556    };
557
558    let target_str = target
559        .to_str()
560        .context("Invalid UTF-8 in reference target")?;
561
562    // Extract manifest digest from path like "../../oci-manifest-sha256:abc"
563    let manifest_part = target_str
564        .rsplit('/')
565        .next()
566        .context("Invalid reference target")?;
567
568    let digest_str = manifest_part
569        .strip_prefix("oci-manifest-")
570        .with_context(|| format!("Invalid manifest reference: {manifest_part}"))?;
571
572    let digest: OciDigest = digest_str
573        .parse()
574        .with_context(|| format!("Invalid OCI digest in reference: {digest_str}"))?;
575
576    // Get the verity by looking up the manifest
577    let verity = repo
578        .has_stream(&manifest_identifier(&digest))?
579        .with_context(|| format!("Manifest {digest} not found"))?;
580
581    Ok((digest, verity))
582}
583
584/// Lists all tagged OCI images.
585///
586/// Returns (name, manifest_digest) pairs for each tag.
587pub fn list_refs<ObjectID: FsVerityHashValue>(
588    repo: &Repository<ObjectID>,
589) -> Result<Vec<(String, OciDigest)>> {
590    let mut refs = Vec::new();
591
592    // Use the repository's ref listing method
593    for (name, target) in repo.list_stream_refs("oci")? {
594        // Extract manifest digest from target path
595        let manifest_part = target.rsplit('/').next().unwrap_or(&target);
596        if let Some(digest_str) = manifest_part.strip_prefix("oci-manifest-")
597            && let Ok(digest) = digest_str.parse()
598        {
599            // Decode the tag name from filesystem-safe encoding
600            refs.push((decode_tag(&name), digest));
601        }
602    }
603
604    Ok(refs)
605}
606
607/// Summary information about a stored OCI image.
608/// FIXME change this to just have a struct of manifest+config JSON
609/// plus a few helper methods. We shouldn't be re-parsing created timestamp here
610/// callers should directly access that etc
611#[derive(Debug, Clone, Serialize)]
612#[serde(rename_all = "camelCase")]
613pub struct ImageInfo {
614    /// The tag/name of the image
615    pub name: String,
616    /// The manifest digest
617    pub manifest_digest: OciDigest,
618    /// Whether this is a container image (vs artifact)
619    pub is_container: bool,
620    /// Architecture (empty for artifacts)
621    pub architecture: String,
622    /// OS (empty for artifacts)
623    pub os: String,
624    /// Creation timestamp
625    pub created: Option<String>,
626    /// Number of layers/blobs
627    pub layer_count: usize,
628    /// Number of OCI referrers (signatures, attestations, etc.)
629    pub referrer_count: usize,
630}
631
632/// Lists all tagged images with their metadata.
633pub fn list_images<ObjectID: FsVerityHashValue>(
634    repo: &Repository<ObjectID>,
635) -> Result<Vec<ImageInfo>> {
636    let mut images = Vec::new();
637
638    for (name, digest) in list_refs(repo)? {
639        match OciImage::open(repo, &digest, None) {
640            Ok(img) => {
641                let referrer_count = list_referrers(repo, &digest).map(|r| r.len()).unwrap_or(0);
642                images.push(ImageInfo {
643                    name,
644                    manifest_digest: digest,
645                    is_container: img.is_container_image(),
646                    architecture: img.architecture(),
647                    os: img.os(),
648                    created: img.created().map(String::from),
649                    layer_count: img.layer_descriptors().len(),
650                    referrer_count,
651                });
652            }
653            Err(e) => {
654                tracing::warn!("skipping image {name}: {e:#}");
655                continue;
656            }
657        }
658    }
659
660    Ok(images)
661}
662
663// =============================================================================
664// Manifest Storage
665// =============================================================================
666
667/// Writes a manifest to the repository.
668///
669/// The manifest JSON is stored as an external object (not inline) so that
670/// fsverity can be independently enabled on it. This is important for signing:
671/// a signature can reference the fsverity digest of the manifest content directly.
672///
673/// The manifest becomes a GC root only if a `reference` name is provided.
674/// The reference name must not contain `@`, which is reserved for digest
675/// references.
676pub fn write_manifest<ObjectID: FsVerityHashValue, S: AsRef<str>>(
677    repo: &Arc<Repository<ObjectID>>,
678    manifest: &ImageManifest,
679    manifest_digest: &OciDigest,
680    config_verity: &ObjectID,
681    layer_verities: &[(S, ObjectID)],
682    reference: Option<&str>,
683) -> Result<ContentAndVerity<ObjectID>> {
684    if let Some(name) = reference {
685        validate_ref_name(name)?;
686    }
687
688    let content_id = manifest_identifier(manifest_digest);
689
690    if let Some(verity) = repo.has_stream(&content_id)? {
691        // Already exists - just add the reference if requested
692        if let Some(name) = reference {
693            tag_image(repo, manifest_digest, name)?;
694        }
695        return Ok((manifest_digest.clone(), verity));
696    }
697
698    let json = manifest.to_string()?;
699    let json_bytes = json.as_bytes();
700
701    let computed = hash_sha256(json_bytes);
702    ensure!(
703        *manifest_digest == computed,
704        "Manifest digest mismatch: expected {manifest_digest}, got {computed}"
705    );
706
707    let mut stream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE)?;
708
709    let config_key = format!("config:{}", manifest.config().digest());
710    stream.add_named_stream_ref(&config_key, config_verity);
711
712    for (diff_id, verity) in layer_verities {
713        stream.add_named_stream_ref(diff_id.as_ref(), verity);
714    }
715
716    stream.write_external(json_bytes)?;
717
718    let oci_ref = reference.map(oci_ref_path);
719    let id = repo.write_stream(stream, &content_id, oci_ref.as_deref())?;
720
721    Ok((computed, id))
722}
723
724/// Rewrites a manifest splitstream with updated named refs.
725///
726/// Unlike [`write_manifest`], this always writes the splitstream even if the
727/// content identifier already exists. This is needed when the manifest JSON
728/// hasn't changed but the config splitstream's verity has (e.g., because an
729/// EROFS image ref was added to the config).
730///
731/// If `reference` is provided, the manifest is also tagged with that name.
732pub(crate) fn rewrite_manifest<ObjectID: FsVerityHashValue, S: AsRef<str>>(
733    repo: &Arc<Repository<ObjectID>>,
734    manifest_json: &[u8],
735    manifest_digest: &OciDigest,
736    config_verity: &ObjectID,
737    layer_verities: &[(S, ObjectID)],
738    reference: Option<&str>,
739) -> Result<(OciDigest, ObjectID)> {
740    let content_id = manifest_identifier(manifest_digest);
741
742    let config_digest = {
743        let manifest = ImageManifest::from_reader(manifest_json)?;
744        manifest.config().digest().to_string()
745    };
746
747    let mut stream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE)?;
748
749    let config_key = format!("config:{config_digest}");
750    stream.add_named_stream_ref(&config_key, config_verity);
751
752    for (diff_id, verity) in layer_verities {
753        stream.add_named_stream_ref(diff_id.as_ref(), verity);
754    }
755
756    stream.write_external(manifest_json)?;
757
758    let oci_ref = reference.map(oci_ref_path);
759    let id = repo.write_stream(stream, &content_id, oci_ref.as_deref())?;
760
761    Ok((manifest_digest.clone(), id))
762}
763
764/// Checks if a manifest exists.
765pub fn has_manifest<ObjectID: FsVerityHashValue>(
766    repo: &Repository<ObjectID>,
767    manifest_digest: &OciDigest,
768) -> Result<Option<ObjectID>> {
769    repo.has_stream(&manifest_identifier(manifest_digest))
770}
771
772/// Returns the content identifier for a manifest.
773pub fn manifest_identifier(digest: &OciDigest) -> String {
774    format!("oci-manifest-{digest}")
775}
776
777/// Returns the reference path for an OCI name.
778fn oci_ref_path(name: &str) -> String {
779    format!("{OCI_REF_PREFIX}{}", encode_tag(name))
780}
781
782/// Encode a tag name for safe filesystem storage.
783///
784/// Uses percent-encoding for characters that are problematic in paths:
785/// - `/` becomes `%2F`
786/// - `%` becomes `%25` (must be first to avoid double-encoding)
787fn encode_tag(name: &str) -> String {
788    name.replace('%', "%25").replace('/', "%2F")
789}
790
791/// Decode a tag name from filesystem storage.
792///
793/// Uses single-pass percent decoding to avoid order-dependent replacement bugs.
794/// For example, `%252F` must decode to `%2F` (not `/`).
795fn decode_tag(encoded: &str) -> String {
796    let mut result = String::with_capacity(encoded.len());
797    let mut chars = encoded.chars().peekable();
798    while let Some(c) = chars.next() {
799        if c == '%' {
800            let hex: String = chars.by_ref().take(2).collect();
801            match hex.as_str() {
802                "2F" => result.push('/'),
803                "25" => result.push('%'),
804                _ => {
805                    result.push('%');
806                    result.push_str(&hex);
807                }
808            }
809        } else {
810            result.push(c);
811        }
812    }
813    result
814}
815
816/// Computes sha256 content hash, returning an OCI `Digest`.
817fn hash_sha256(bytes: &[u8]) -> OciDigest {
818    crate::sha256_content_digest(bytes)
819}
820
821// =============================================================================
822// Arbitrary Blob Storage (for OCI Artifacts)
823// =============================================================================
824
825/// Returns the content identifier for an arbitrary blob.
826pub fn blob_identifier(digest: &OciDigest) -> String {
827    format!("oci-blob-{digest}")
828}
829
830/// Writes an arbitrary blob to the repository.
831///
832/// This is used for OCI artifacts with non-tar media types. The blob is stored
833/// as an external object so that fsverity can be independently enabled on the
834/// raw content.
835///
836/// Returns (sha256 digest, fs-verity hash).
837pub fn write_blob<ObjectID: FsVerityHashValue>(
838    repo: &Arc<Repository<ObjectID>>,
839    data: &[u8],
840) -> Result<(OciDigest, ObjectID)> {
841    let digest = hash_sha256(data);
842    let content_id = blob_identifier(&digest);
843
844    if let Some(verity) = repo.has_stream(&content_id)? {
845        return Ok((digest, verity));
846    }
847
848    let mut stream = repo.create_stream(OCI_BLOB_CONTENT_TYPE)?;
849    stream.write_external(data)?;
850    let verity = repo.write_stream(stream, &content_id, None)?;
851
852    Ok((digest, verity))
853}
854
855/// Opens an arbitrary blob from the repository.
856///
857/// Returns the blob data. If verity is provided, it's used for fast lookup;
858/// otherwise, the content hash is verified against the digest.
859pub fn open_blob<ObjectID: FsVerityHashValue>(
860    repo: &Repository<ObjectID>,
861    digest: &OciDigest,
862    verity: Option<&ObjectID>,
863) -> Result<Vec<u8>> {
864    let content_id = blob_identifier(digest);
865    let (data, _named_refs) =
866        read_external_splitstream(repo, &content_id, verity, Some(OCI_BLOB_CONTENT_TYPE))?;
867
868    if verity.is_none() {
869        let computed = hash_sha256(&data);
870        ensure!(
871            *digest == computed,
872            "Blob integrity failed: expected {digest}, got {computed}"
873        );
874    }
875
876    Ok(data)
877}
878
879// =============================================================================
880// Referrer Index (for OCI Artifacts with subject field)
881// =============================================================================
882
883/// Prefix for referrer index references.
884const REFERRER_REF_PREFIX: &str = "oci-referrers/";
885
886/// Records a referrer relationship: an artifact references a subject image.
887///
888/// Creates a symlink at `streams/refs/oci-referrers/{subject_digest}/{artifact_digest}`
889/// pointing to the artifact's manifest stream. This enables discovery of all artifacts
890/// that reference a given image (e.g. finding all signature artifacts for an image).
891///
892/// Both digests should be in the `sha256:...` format used by OCI.
893pub fn add_referrer<ObjectID: FsVerityHashValue>(
894    repo: &Repository<ObjectID>,
895    subject_digest: &OciDigest,
896    artifact_manifest_digest: &OciDigest,
897) -> Result<()> {
898    let subject_str: &str = subject_digest.as_ref();
899    let artifact_str: &str = artifact_manifest_digest.as_ref();
900    let ref_name = format!(
901        "{REFERRER_REF_PREFIX}{}/{}",
902        encode_tag(subject_str),
903        encode_tag(artifact_str)
904    );
905    let manifest_id = manifest_identifier(artifact_manifest_digest);
906    repo.name_stream(&manifest_id, &ref_name)
907}
908
909/// Lists all artifacts that reference the given subject manifest digest.
910///
911/// Returns `(artifact_manifest_digest, artifact_manifest_verity)` pairs for
912/// each artifact that declared the subject as its referrer. The digests are
913/// in `sha256:...` format.
914pub fn list_referrers<ObjectID: FsVerityHashValue>(
915    repo: &Repository<ObjectID>,
916    subject_digest: &OciDigest,
917) -> Result<Vec<(OciDigest, ObjectID)>> {
918    let subject_str: &str = subject_digest.as_ref();
919    let prefix = format!("{REFERRER_REF_PREFIX}{}", encode_tag(subject_str));
920
921    let mut referrers = Vec::new();
922
923    for (name, target) in repo.list_stream_refs(&prefix)? {
924        // The name is the encoded artifact manifest digest
925        let artifact_digest_str = decode_tag(&name);
926
927        // Extract verity from the symlink target — it points to
928        // a manifest stream path like "../../oci-manifest-sha256:abc..."
929        let manifest_part = target.rsplit('/').next().unwrap_or(&target);
930        if let Some(digest) = manifest_part.strip_prefix("oci-manifest-") {
931            // Verify consistency: the ref name should match the target
932            if digest != artifact_digest_str {
933                continue;
934            }
935        }
936
937        // Look up the verity for this manifest
938        let artifact_digest: OciDigest = artifact_digest_str
939            .parse()
940            .with_context(|| format!("Parsing referrer digest '{artifact_digest_str}'"))?;
941        match repo.has_stream(&manifest_identifier(&artifact_digest))? {
942            Some(verity) => referrers.push((artifact_digest, verity)),
943            None => {
944                continue;
945            }
946        }
947    }
948
949    Ok(referrers)
950}
951
952/// Removes a specific referrer index entry.
953///
954/// Idempotent — returns Ok if the entry doesn't exist.
955pub fn remove_referrer<ObjectID: FsVerityHashValue>(
956    repo: &Repository<ObjectID>,
957    subject_digest: &OciDigest,
958    artifact_digest: &OciDigest,
959) -> Result<()> {
960    let subject_str: &str = subject_digest.as_ref();
961    let artifact_str: &str = artifact_digest.as_ref();
962    let ref_path = format!(
963        "streams/refs/{REFERRER_REF_PREFIX}{}/{}",
964        encode_tag(subject_str),
965        encode_tag(artifact_str)
966    );
967    match unlinkat(repo.repo_fd(), &ref_path, AtFlags::empty()) {
968        Ok(()) => Ok(()),
969        Err(Errno::NOENT) => Ok(()),
970        Err(e) => Err(e).with_context(|| format!("Failed to remove referrer {artifact_digest}")),
971    }
972}
973
974/// Removes all referrer index entries for a subject.
975///
976/// Removes each referrer symlink and tries to remove the empty subject
977/// directory afterwards. Idempotent — returns Ok if no entries exist.
978pub fn remove_referrers_for_subject<ObjectID: FsVerityHashValue>(
979    repo: &Repository<ObjectID>,
980    subject_digest: &OciDigest,
981) -> Result<()> {
982    let referrers = list_referrers(repo, subject_digest)?;
983    for (artifact_digest, _verity) in &referrers {
984        remove_referrer(repo, subject_digest, artifact_digest)?;
985    }
986    // Try to remove the now-empty subject directory (ignore errors)
987    let subject_str: &str = subject_digest.as_ref();
988    let subject_dir = format!(
989        "streams/refs/{REFERRER_REF_PREFIX}{}",
990        encode_tag(subject_str)
991    );
992    let _ = unlinkat(repo.repo_fd(), &subject_dir, AtFlags::REMOVEDIR);
993    Ok(())
994}
995
996/// Removes referrer index entries whose subject manifest no longer exists.
997///
998/// When a subject image is untagged and garbage collected, its referrer
999/// artifacts become orphaned — their referrer symlinks under
1000/// `streams/refs/oci-referrers/{subject_digest}/` still act as GC roots,
1001/// preventing the artifact manifests from being collected.
1002///
1003/// Call this **before** running GC to ensure orphaned referrer artifacts
1004/// are also eligible for collection. The typical workflow is:
1005///
1006/// ```text
1007/// cleanup_dangling_referrers(&repo)?;
1008/// repo.gc(&[])?;
1009/// ```
1010///
1011/// Returns the number of referrer entries removed.
1012pub fn cleanup_dangling_referrers<ObjectID: FsVerityHashValue>(
1013    repo: &Repository<ObjectID>,
1014) -> Result<u64> {
1015    let referrers_path = format!("streams/refs/{REFERRER_REF_PREFIX}");
1016
1017    // Open the oci-referrers directory; if it doesn't exist, there's nothing to do
1018    let referrers_dir = match openat(
1019        repo.repo_fd(),
1020        &*referrers_path,
1021        OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC,
1022        Mode::empty(),
1023    ) {
1024        Ok(fd) => fd,
1025        Err(Errno::NOENT) => return Ok(0),
1026        Err(e) => return Err(e).context("Opening oci-referrers directory")?,
1027    };
1028
1029    let mut removed = 0u64;
1030
1031    // Collect subject directory names first to avoid borrowing issues
1032    let mut subject_dirs = Vec::new();
1033    for item in Dir::read_from(&referrers_dir).context("Reading oci-referrers directory")? {
1034        let entry = item.context("Reading oci-referrers entry")?;
1035        let name = entry.file_name();
1036        if name == c"." || name == c".." {
1037            continue;
1038        }
1039        if let Ok(s) = std::str::from_utf8(name.to_bytes()) {
1040            subject_dirs.push(s.to_string());
1041        }
1042    }
1043
1044    for encoded_subject in &subject_dirs {
1045        let subject_digest_str = decode_tag(encoded_subject);
1046        let subject_digest: OciDigest = subject_digest_str
1047            .parse()
1048            .with_context(|| format!("Parsing subject digest '{subject_digest_str}'"))?;
1049
1050        // Check if the subject manifest still exists in the repository
1051        if has_manifest(repo, &subject_digest)?.is_some() {
1052            continue;
1053        }
1054
1055        // Subject is gone — remove all referrer entries in this directory
1056        let subject_dir_fd = match openat(
1057            &referrers_dir,
1058            encoded_subject.as_str(),
1059            OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC,
1060            Mode::empty(),
1061        ) {
1062            Ok(fd) => fd,
1063            Err(Errno::NOENT) => continue,
1064            Err(e) => {
1065                return Err(e)
1066                    .context(format!("Opening referrer subject dir {encoded_subject}"))?;
1067            }
1068        };
1069
1070        for item in Dir::read_from(&subject_dir_fd).context("Reading referrer subject directory")? {
1071            let entry = item.context("Reading referrer entry")?;
1072            let name = entry.file_name();
1073            if name == c"." || name == c".." {
1074                continue;
1075            }
1076            unlinkat(&subject_dir_fd, name, AtFlags::empty())
1077                .with_context(|| format!("Removing referrer entry {name:?}"))?;
1078            removed += 1;
1079        }
1080
1081        // Remove the now-empty subject directory
1082        unlinkat(&referrers_dir, encoded_subject.as_str(), AtFlags::REMOVEDIR)
1083            .with_context(|| format!("Removing empty referrer subject dir {encoded_subject}"))?;
1084    }
1085
1086    Ok(removed)
1087}
1088
1089// =============================================================================
1090// Filesystem Consistency Checks (fsck)
1091// =============================================================================
1092
1093/// A structured error found during an OCI-level consistency check.
1094///
1095/// Each variant corresponds to a specific kind of OCI metadata integrity
1096/// problem. The `Display` implementation produces a kebab-case error type
1097/// prefix followed by the image name/context and any relevant details.
1098#[derive(Debug, Clone, serde::Serialize, thiserror::Error)]
1099#[serde(tag = "type", rename_all = "kebab-case")]
1100#[non_exhaustive]
1101#[allow(missing_docs)]
1102pub enum OciFsckError {
1103    #[error("fsck: manifest-read-failed: {name}: {detail}")]
1104    ManifestReadFailed { name: String, detail: String },
1105
1106    #[error("fsck: manifest-digest-mismatch: {name}: expected {expected}, got {actual}")]
1107    ManifestDigestMismatch {
1108        name: String,
1109        expected: String,
1110        actual: String,
1111    },
1112
1113    #[error("fsck: manifest-parse-failed: {name}: {detail}")]
1114    ManifestParseFailed { name: String, detail: String },
1115
1116    #[error("fsck: config-ref-missing: {name}: {digest}")]
1117    ConfigRefMissing { name: String, digest: String },
1118
1119    #[error("fsck: config-read-failed: {name}: {detail}")]
1120    ConfigReadFailed { name: String, detail: String },
1121
1122    #[error("fsck: config-digest-mismatch: {name}: expected {expected}, got {actual}")]
1123    ConfigDigestMismatch {
1124        name: String,
1125        expected: String,
1126        actual: String,
1127    },
1128
1129    #[error("fsck: config-parse-failed: {name}: {detail}")]
1130    ConfigParseFailed { name: String, detail: String },
1131
1132    #[error("fsck: layer-ref-missing: {name}: {diff_id}")]
1133    #[serde(rename_all = "camelCase")]
1134    LayerRefMissing { name: String, diff_id: String },
1135
1136    #[error("fsck: layer-stream-missing: {name}: {diff_id}")]
1137    #[serde(rename_all = "camelCase")]
1138    LayerStreamMissing { name: String, diff_id: String },
1139
1140    #[error("fsck: layer-check-failed: {name}: {diff_id}: {detail}")]
1141    #[serde(rename_all = "camelCase")]
1142    LayerCheckFailed {
1143        name: String,
1144        diff_id: String,
1145        detail: String,
1146    },
1147
1148    #[error("fsck: layer-object-missing: {name}: {diff_id}: {detail}")]
1149    #[serde(rename_all = "camelCase")]
1150    LayerObjectMissing {
1151        name: String,
1152        diff_id: String,
1153        detail: String,
1154    },
1155
1156    #[error("fsck: seal-image-missing: {name}: {digest}: {detail}")]
1157    SealImageMissing {
1158        name: String,
1159        digest: String,
1160        detail: String,
1161    },
1162
1163    #[error("fsck: artifact-layer-ref-missing: {name}: {digest}")]
1164    ArtifactLayerRefMissing { name: String, digest: String },
1165
1166    #[error("fsck: artifact-layer-object-missing: {name}: {digest}: {detail}")]
1167    ArtifactLayerObjectMissing {
1168        name: String,
1169        digest: String,
1170        detail: String,
1171    },
1172
1173    #[error("fsck: ref-resolve-failed: {name}: {detail}")]
1174    RefResolveFailed { name: String, detail: String },
1175
1176    #[error("fsck: invalid-ref-name: {name}: leading '@' is reserved for digest references")]
1177    InvalidRefName { name: String },
1178}
1179
1180/// Results from an OCI-level filesystem consistency check.
1181///
1182/// Returned by [`oci_fsck`] and [`oci_fsck_image`] to report integrity status
1183/// of OCI images stored in the repository. This includes checks at both the
1184/// OCI metadata level (manifest/config digests, layer references) and the
1185/// underlying repository level (object integrity, splitstream validity).
1186#[derive(Debug, Clone, Default, Serialize)]
1187#[serde(rename_all = "camelCase")]
1188pub struct OciFsckResult {
1189    pub(crate) repo_result: composefs::repository::FsckResult,
1190    pub(crate) images_checked: u64,
1191    pub(crate) images_corrupted: u64,
1192    pub(crate) errors: Vec<OciFsckError>,
1193}
1194
1195impl OciFsckResult {
1196    /// Returns true if no corruption or errors were found at any level.
1197    pub fn is_ok(&self) -> bool {
1198        debug_assert!(
1199            self.images_corrupted == 0 || !self.errors.is_empty(),
1200            "images_corrupted is non-zero but no OCI error messages recorded"
1201        );
1202        self.repo_result.is_ok() && self.errors.is_empty()
1203    }
1204
1205    /// Results from the underlying repository fsck.
1206    pub fn repo_result(&self) -> &composefs::repository::FsckResult {
1207        &self.repo_result
1208    }
1209
1210    /// Number of OCI images checked.
1211    pub fn images_checked(&self) -> u64 {
1212        self.images_checked
1213    }
1214
1215    /// Number of OCI images with issues.
1216    pub fn images_corrupted(&self) -> u64 {
1217        self.images_corrupted
1218    }
1219
1220    /// OCI-level errors found during the check.
1221    pub fn errors(&self) -> &[OciFsckError] {
1222        &self.errors
1223    }
1224}
1225
1226impl std::fmt::Display for OciFsckResult {
1227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1228        write!(f, "{}", self.repo_result)?;
1229        writeln!(
1230            f,
1231            "oci images: {}/{} ok",
1232            self.images_checked.saturating_sub(self.images_corrupted),
1233            self.images_checked
1234        )?;
1235        if !self.errors.is_empty() {
1236            writeln!(f, "oci errors: {}", self.errors.len())?;
1237            for err in &self.errors {
1238                writeln!(f, "  - {err}")?;
1239            }
1240        }
1241        Ok(())
1242    }
1243}
1244
1245/// Run a full OCI-aware consistency check on the repository.
1246///
1247/// This performs the underlying repository fsck (object integrity, splitstream
1248/// validation, symlink checks) and then additionally validates all tagged OCI
1249/// images: manifest digest verification, config digest verification, layer
1250/// reference existence, and seal consistency.
1251pub async fn oci_fsck<ObjectID: FsVerityHashValue>(
1252    repo: &Repository<ObjectID>,
1253) -> Result<OciFsckResult> {
1254    let repo_result = repo.fsck().await?;
1255    let mut result = OciFsckResult {
1256        repo_result,
1257        ..Default::default()
1258    };
1259
1260    // Check all tagged OCI images
1261    let refs = list_refs(repo).context("listing OCI refs")?;
1262    for (name, manifest_digest) in refs {
1263        if name.starts_with('@') {
1264            result.images_checked += 1;
1265            result.images_corrupted += 1;
1266            result
1267                .errors
1268                .push(OciFsckError::InvalidRefName { name: name.clone() });
1269            continue;
1270        }
1271        fsck_single_image(repo, &name, &manifest_digest, &mut result);
1272    }
1273
1274    Ok(result)
1275}
1276
1277/// Run an OCI-aware consistency check on a single image by tag name.
1278///
1279/// Performs the underlying repository fsck, then validates the specified image.
1280pub async fn oci_fsck_image<ObjectID: FsVerityHashValue>(
1281    repo: &Repository<ObjectID>,
1282    name: &str,
1283) -> Result<OciFsckResult> {
1284    let repo_result = repo.fsck().await?;
1285    let mut result = OciFsckResult {
1286        repo_result,
1287        ..Default::default()
1288    };
1289
1290    let (manifest_digest, _verity) = match resolve_ref(repo, name) {
1291        Ok(v) => v,
1292        Err(e) => {
1293            result.images_corrupted += 1;
1294            result.images_checked += 1;
1295            result.errors.push(OciFsckError::RefResolveFailed {
1296                name: name.to_string(),
1297                detail: e.to_string(),
1298            });
1299            return Ok(result);
1300        }
1301    };
1302
1303    fsck_single_image(repo, name, &manifest_digest, &mut result);
1304    Ok(result)
1305}
1306
1307/// Internal: validate a single OCI image's metadata integrity.
1308fn fsck_single_image<ObjectID: FsVerityHashValue>(
1309    repo: &Repository<ObjectID>,
1310    name: &str,
1311    manifest_digest: &OciDigest,
1312    result: &mut OciFsckResult,
1313) {
1314    result.images_checked += 1;
1315    let error_count_before = result.errors.len();
1316
1317    // 1. Verify manifest content hash
1318    let manifest_id = manifest_identifier(manifest_digest);
1319    let (manifest_data, manifest_named_refs) = match read_external_splitstream(
1320        repo,
1321        &manifest_id,
1322        None,
1323        Some(OCI_MANIFEST_CONTENT_TYPE),
1324    ) {
1325        Ok(v) => v,
1326        Err(e) => {
1327            result.images_corrupted += 1;
1328            result.errors.push(OciFsckError::ManifestReadFailed {
1329                name: name.to_string(),
1330                detail: e.to_string(),
1331            });
1332            return;
1333        }
1334    };
1335
1336    let computed_digest = hash_sha256(&manifest_data);
1337    if *manifest_digest != computed_digest {
1338        result.images_corrupted += 1;
1339        result.errors.push(OciFsckError::ManifestDigestMismatch {
1340            name: name.to_string(),
1341            expected: manifest_digest.to_string(),
1342            actual: computed_digest.to_string(),
1343        });
1344        return;
1345    }
1346
1347    // 2. Parse manifest
1348    let manifest = match ImageManifest::from_reader(&manifest_data[..]) {
1349        Ok(m) => m,
1350        Err(e) => {
1351            result.images_corrupted += 1;
1352            result.errors.push(OciFsckError::ManifestParseFailed {
1353                name: name.to_string(),
1354                detail: e.to_string(),
1355            });
1356            return;
1357        }
1358    };
1359
1360    // 3. Verify config reference exists in manifest's named refs
1361    let config_digest = manifest.config().digest().clone();
1362    let config_key = format!("config:{config_digest}");
1363    let config_verity = match manifest_named_refs.get(config_key.as_str()) {
1364        Some(v) => v.clone(),
1365        None => {
1366            result.images_corrupted += 1;
1367            result.errors.push(OciFsckError::ConfigRefMissing {
1368                name: name.to_string(),
1369                digest: config_digest.to_string(),
1370            });
1371            return;
1372        }
1373    };
1374
1375    // 4. Verify config content hash
1376    let config_id = crate::config_identifier(&config_digest);
1377    let (config_data, config_named_refs) = match read_external_splitstream(
1378        repo,
1379        &config_id,
1380        Some(&config_verity),
1381        Some(OCI_CONFIG_CONTENT_TYPE),
1382    ) {
1383        Ok(v) => v,
1384        Err(e) => {
1385            result.images_corrupted += 1;
1386            result.errors.push(OciFsckError::ConfigReadFailed {
1387                name: name.to_string(),
1388                detail: e.to_string(),
1389            });
1390            return;
1391        }
1392    };
1393
1394    let computed_config = hash_sha256(&config_data);
1395    if config_digest != computed_config {
1396        result.images_corrupted += 1;
1397        result.errors.push(OciFsckError::ConfigDigestMismatch {
1398            name: name.to_string(),
1399            expected: config_digest.to_string(),
1400            actual: computed_config.to_string(),
1401        });
1402        return;
1403    }
1404
1405    // 5. Parse config and verify layer references
1406    let is_container = matches!(manifest.config().media_type(), MediaType::ImageConfig);
1407
1408    if is_container {
1409        let config = match ImageConfiguration::from_reader(&config_data[..]) {
1410            Ok(c) => c,
1411            Err(e) => {
1412                result.images_corrupted += 1;
1413                result.errors.push(OciFsckError::ConfigParseFailed {
1414                    name: name.to_string(),
1415                    detail: e.to_string(),
1416                });
1417                return;
1418            }
1419        };
1420
1421        // Verify each layer diff_id has a corresponding named ref and stream
1422        for diff_id_str in config.rootfs().diff_ids() {
1423            let layer_verity = match config_named_refs.get(diff_id_str.as_str()) {
1424                Some(v) => v,
1425                None => {
1426                    result.errors.push(OciFsckError::LayerRefMissing {
1427                        name: name.to_string(),
1428                        diff_id: diff_id_str.to_string(),
1429                    });
1430                    continue;
1431                }
1432            };
1433
1434            let diff_id: OciDigest = match diff_id_str.parse() {
1435                Ok(d) => d,
1436                Err(e) => {
1437                    result.errors.push(OciFsckError::LayerCheckFailed {
1438                        name: name.to_string(),
1439                        diff_id: diff_id_str.to_string(),
1440                        detail: format!("Invalid diff_id: {e}"),
1441                    });
1442                    continue;
1443                }
1444            };
1445
1446            // Check the layer stream exists
1447            let layer_id = crate::layer_identifier(&diff_id);
1448            match repo.has_stream(&layer_id) {
1449                Ok(Some(_)) => {}
1450                Ok(None) => {
1451                    result.errors.push(OciFsckError::LayerStreamMissing {
1452                        name: name.to_string(),
1453                        diff_id: diff_id.to_string(),
1454                    });
1455                }
1456                Err(e) => {
1457                    result.errors.push(OciFsckError::LayerCheckFailed {
1458                        name: name.to_string(),
1459                        diff_id: diff_id.to_string(),
1460                        detail: e.to_string(),
1461                    });
1462                }
1463            }
1464
1465            // Verify the layer's object exists
1466            match repo.open_object(layer_verity) {
1467                Ok(_) => {}
1468                Err(e) => {
1469                    result.errors.push(OciFsckError::LayerObjectMissing {
1470                        name: name.to_string(),
1471                        diff_id: diff_id.to_string(),
1472                        detail: e.to_string(),
1473                    });
1474                }
1475            }
1476        }
1477
1478        // 6. If sealed, verify the seal image exists
1479        if let Some(seal_digest) = config.get_config_annotation("containers.composefs.fsverity") {
1480            match repo.open_image(seal_digest) {
1481                Ok(_) => {}
1482                Err(e) => {
1483                    result.errors.push(OciFsckError::SealImageMissing {
1484                        name: name.to_string(),
1485                        digest: seal_digest.to_string(),
1486                        detail: e.to_string(),
1487                    });
1488                }
1489            }
1490        }
1491    } else {
1492        // Artifact: verify layer references from manifest named refs
1493        for layer_desc in manifest.layers() {
1494            let layer_digest = layer_desc.digest().to_string();
1495            match manifest_named_refs.get(layer_digest.as_str()) {
1496                Some(verity) => {
1497                    // Verify the layer object exists
1498                    match repo.open_object(verity) {
1499                        Ok(_) => {}
1500                        Err(e) => {
1501                            result
1502                                .errors
1503                                .push(OciFsckError::ArtifactLayerObjectMissing {
1504                                    name: name.to_string(),
1505                                    digest: layer_digest,
1506                                    detail: e.to_string(),
1507                                });
1508                        }
1509                    }
1510                }
1511                None => {
1512                    result.errors.push(OciFsckError::ArtifactLayerRefMissing {
1513                        name: name.to_string(),
1514                        digest: layer_digest,
1515                    });
1516                }
1517            }
1518        }
1519    }
1520
1521    // Count at most once per image
1522    if result.errors.len() > error_count_before {
1523        result.images_corrupted += 1;
1524    }
1525}
1526
1527// =============================================================================
1528// Layer Inspection
1529// =============================================================================
1530
1531/// Metadata about a layer stored in the repository.
1532#[derive(Debug, Clone, Serialize)]
1533#[serde(rename_all = "camelCase")]
1534pub struct LayerInfo {
1535    /// The layer diff_id (sha256 hash of uncompressed content)
1536    pub diff_id: String,
1537    /// The fs-verity hash of the layer splitstream
1538    pub verity: String,
1539    /// Size of the uncompressed tar layer in bytes
1540    pub size: u64,
1541    /// Number of files/entries in the layer
1542    pub entry_count: usize,
1543    /// Splitstream metadata
1544    pub splitstream: SplitstreamInfo,
1545}
1546
1547/// Metadata about the splitstream representation of a layer.
1548#[derive(Debug, Clone, Serialize)]
1549#[serde(rename_all = "camelCase")]
1550pub struct SplitstreamInfo {
1551    /// Number of external object references (large files stored separately)
1552    pub external_objects: usize,
1553    /// Total size of external objects in bytes
1554    pub external_size: u64,
1555    /// Size of inline data in bytes (small files + tar headers)
1556    pub inline_size: u64,
1557}
1558
1559/// Opens a layer by its diff_id and returns metadata about it.
1560///
1561/// The diff_id should be in the `sha256:...` format used by OCI.
1562pub fn layer_info<ObjectID: FsVerityHashValue>(
1563    repo: &Repository<ObjectID>,
1564    diff_id: &OciDigest,
1565) -> Result<LayerInfo> {
1566    let content_id = crate::layer_identifier(diff_id);
1567    let verity = repo
1568        .has_stream(&content_id)?
1569        .with_context(|| format!("Layer {diff_id} not found"))?;
1570
1571    let mut stream = repo.open_stream(
1572        &content_id,
1573        Some(&verity),
1574        Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE),
1575    )?;
1576
1577    // Get the total size from the splitstream header (this is the merged/tar size)
1578    let size = stream.total_size;
1579
1580    // Count external object references (this doesn't consume the stream)
1581    let mut external_objects = 0usize;
1582    stream.get_object_refs(|_| external_objects += 1)?;
1583
1584    // Iterate entries and gather sizes
1585    let mut entry_count = 0usize;
1586    let mut external_size = 0u64;
1587
1588    while let Some(entry) = crate::tar::get_entry(&mut stream)? {
1589        entry_count += 1;
1590        if let crate::tar::TarItem::Leaf(composefs::tree::LeafContent::Regular(
1591            composefs::tree::RegularFile::External(_, file_size),
1592        )) = entry.item
1593        {
1594            external_size += file_size;
1595        }
1596    }
1597
1598    // inline_size includes tar headers, small files, and other metadata
1599    let inline_size = size.saturating_sub(external_size);
1600
1601    Ok(LayerInfo {
1602        diff_id: diff_id.to_string(),
1603        verity: verity.to_hex(),
1604        size,
1605        entry_count,
1606        splitstream: SplitstreamInfo {
1607            external_objects,
1608            external_size,
1609            inline_size,
1610        },
1611    })
1612}
1613
1614/// Writes the layer contents in composefs dumpfile format.
1615///
1616/// Each entry is written on its own line in the composefs dumpfile format,
1617/// which includes path, size, mode, ownership, timestamps, and content references.
1618pub fn layer_dumpfile<ObjectID: FsVerityHashValue>(
1619    repo: &Repository<ObjectID>,
1620    diff_id: &OciDigest,
1621    output: &mut impl std::io::Write,
1622) -> Result<()> {
1623    let content_id = crate::layer_identifier(diff_id);
1624    let verity = repo
1625        .has_stream(&content_id)?
1626        .with_context(|| format!("Layer {diff_id} not found"))?;
1627
1628    let mut stream = repo.open_stream(
1629        &content_id,
1630        Some(&verity),
1631        Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE),
1632    )?;
1633
1634    while let Some(entry) = crate::tar::get_entry(&mut stream)? {
1635        writeln!(output, "{entry}")?;
1636    }
1637
1638    Ok(())
1639}
1640
1641/// Reconstitutes and writes the original tar layer.
1642///
1643/// This merges the splitstream back into the original tar format by
1644/// combining inline data with external object references.
1645pub fn layer_tar<ObjectID: FsVerityHashValue>(
1646    repo: &Repository<ObjectID>,
1647    diff_id: &OciDigest,
1648    output: &mut impl std::io::Write,
1649) -> Result<()> {
1650    let content_id = crate::layer_identifier(diff_id);
1651    let verity = repo
1652        .has_stream(&content_id)?
1653        .with_context(|| format!("Layer {diff_id} not found"))?;
1654
1655    repo.merge_splitstream(
1656        &content_id,
1657        Some(&verity),
1658        Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE),
1659        output,
1660    )
1661}
1662
1663#[cfg(test)]
1664mod test {
1665    use super::*;
1666    use composefs::fsverity::Sha256HashValue;
1667    use composefs::test::TestRepo;
1668    use containers_image_proxy::oci_spec::image::{
1669        ConfigBuilder, DescriptorBuilder, ImageConfigurationBuilder, ImageManifestBuilder,
1670        RootFsBuilder,
1671    };
1672    use std::fs::File;
1673    use std::io::Read;
1674
1675    /// Helper to create a synthetic container image in the repository.
1676    ///
1677    /// Creates a minimal but valid container image with:
1678    /// - A single "layer" (stored as an external object)
1679    /// - Proper OCI manifest and config structure
1680    /// - Optional tag
1681    ///
1682    /// Returns (manifest_digest, manifest_verity, config_digest).
1683    fn create_test_image(
1684        repo: &Arc<Repository<Sha256HashValue>>,
1685        tag: Option<&str>,
1686        arch: &str,
1687    ) -> (OciDigest, Sha256HashValue, OciDigest) {
1688        // Create a fake layer - in real usage this would be a tar splitstream
1689        // For testing the manifest/config storage, we just need valid references
1690        let layer_data = format!("fake-layer-{arch}").into_bytes();
1691        let layer_digest = hash_sha256(&layer_data);
1692
1693        let mut layer_stream = repo
1694            .create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE)
1695            .unwrap();
1696        layer_stream.write_external(&layer_data).unwrap();
1697        let layer_verity = repo
1698            .write_stream(layer_stream, &crate::layer_identifier(&layer_digest), None)
1699            .unwrap();
1700
1701        let rootfs = RootFsBuilder::default()
1702            .typ("layers")
1703            .diff_ids(vec![layer_digest.to_string()])
1704            .build()
1705            .unwrap();
1706
1707        let cfg = ConfigBuilder::default().build().unwrap();
1708
1709        let config = ImageConfigurationBuilder::default()
1710            .architecture(arch)
1711            .os("linux")
1712            .rootfs(rootfs)
1713            .config(cfg)
1714            .build()
1715            .unwrap();
1716
1717        let config_json = config.to_string().unwrap();
1718        let config_digest = hash_sha256(config_json.as_bytes());
1719
1720        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
1721        config_stream.add_named_stream_ref(layer_digest.as_ref(), &layer_verity);
1722        config_stream
1723            .write_external(config_json.as_bytes())
1724            .unwrap();
1725        let config_verity = repo
1726            .write_stream(
1727                config_stream,
1728                &crate::config_identifier(&config_digest),
1729                None,
1730            )
1731            .unwrap();
1732
1733        let config_descriptor = DescriptorBuilder::default()
1734            .media_type(MediaType::ImageConfig)
1735            .digest(config_digest.clone())
1736            .size(config_json.len() as u64)
1737            .build()
1738            .unwrap();
1739
1740        let layer_descriptor = DescriptorBuilder::default()
1741            .media_type(MediaType::ImageLayerGzip)
1742            .digest(layer_digest.clone())
1743            .size(layer_data.len() as u64)
1744            .build()
1745            .unwrap();
1746
1747        let manifest = ImageManifestBuilder::default()
1748            .schema_version(2u32)
1749            .media_type(MediaType::ImageManifest)
1750            .config(config_descriptor)
1751            .layers(vec![layer_descriptor])
1752            .build()
1753            .unwrap();
1754
1755        let layer_verities = [(layer_digest, layer_verity)];
1756
1757        let manifest_json = manifest.to_string().unwrap();
1758        let manifest_digest = hash_sha256(manifest_json.as_bytes());
1759
1760        let (_stored_digest, manifest_verity) = write_manifest(
1761            repo,
1762            &manifest,
1763            &manifest_digest,
1764            &config_verity,
1765            &layer_verities,
1766            tag,
1767        )
1768        .unwrap();
1769
1770        (manifest_digest, manifest_verity, config_digest)
1771    }
1772
1773    #[test]
1774    fn test_manifest_identifier() {
1775        let digest: OciDigest =
1776            "sha256:abc1230000000000000000000000000000000000000000000000000000000000"
1777                .parse()
1778                .unwrap();
1779        assert_eq!(
1780            manifest_identifier(&digest),
1781            "oci-manifest-sha256:abc1230000000000000000000000000000000000000000000000000000000000"
1782        );
1783    }
1784
1785    #[test]
1786    fn test_oci_ref_path() {
1787        assert_eq!(oci_ref_path("myimage:latest"), "oci/myimage:latest");
1788        // Slashes get encoded
1789        assert_eq!(oci_ref_path("library/nginx"), "oci/library%2Fnginx");
1790        assert_eq!(oci_ref_path("docker://busybox"), "oci/docker:%2F%2Fbusybox");
1791    }
1792
1793    #[test]
1794    fn test_encode_decode_tag() {
1795        // Simple names pass through
1796        assert_eq!(encode_tag("myimage:latest"), "myimage:latest");
1797        assert_eq!(decode_tag("myimage:latest"), "myimage:latest");
1798
1799        // Slashes get encoded
1800        assert_eq!(encode_tag("library/nginx"), "library%2Fnginx");
1801        assert_eq!(decode_tag("library%2Fnginx"), "library/nginx");
1802
1803        // Double slashes
1804        assert_eq!(encode_tag("docker://busybox"), "docker:%2F%2Fbusybox");
1805        assert_eq!(decode_tag("docker:%2F%2Fbusybox"), "docker://busybox");
1806
1807        // Percent signs get encoded first to avoid conflicts
1808        assert_eq!(encode_tag("test%2F"), "test%252F");
1809        assert_eq!(decode_tag("test%252F"), "test%2F");
1810
1811        // Round-trip including tricky inputs where order-dependent
1812        // replacement would produce wrong results
1813        let names = [
1814            "simple",
1815            "with:tag",
1816            "registry.io/image:v1",
1817            "docker://busybox:latest",
1818            "containers-storage:myimage",
1819            "weird%name/with/slashes",
1820            "%2F",
1821            "a/b%c",
1822            "100%",
1823            "normal:tag",
1824            "%25already-encoded",
1825            "double%%percent",
1826        ];
1827        for name in names {
1828            assert_eq!(
1829                decode_tag(&encode_tag(name)),
1830                name,
1831                "round-trip failed for {name}"
1832            );
1833        }
1834    }
1835
1836    #[test]
1837    fn test_hash_sha256() {
1838        assert_eq!(
1839            hash_sha256(b"hello world").as_ref(),
1840            "sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
1841        );
1842    }
1843
1844    #[test]
1845    fn test_blob_identifier() {
1846        let digest: OciDigest =
1847            "sha256:abc1230000000000000000000000000000000000000000000000000000000000"
1848                .parse()
1849                .unwrap();
1850        assert_eq!(
1851            blob_identifier(&digest),
1852            "oci-blob-sha256:abc1230000000000000000000000000000000000000000000000000000000000"
1853        );
1854    }
1855
1856    #[test]
1857    fn test_write_and_read_blob() {
1858        let test_repo = TestRepo::<Sha256HashValue>::new();
1859        let repo = &test_repo.repo;
1860
1861        let data = b"This is some arbitrary blob data for an OCI artifact.";
1862        let (digest, verity) = write_blob(repo, data).unwrap();
1863
1864        assert!(digest.as_ref().starts_with("sha256:"));
1865
1866        // Read back with verity (fast path)
1867        let read_data = open_blob(&repo, &digest, Some(&verity)).unwrap();
1868        assert_eq!(read_data, data);
1869
1870        // Read back without verity (verifies content hash)
1871        let read_data2 = open_blob(&repo, &digest, None).unwrap();
1872        assert_eq!(read_data2, data);
1873    }
1874
1875    #[test]
1876    fn test_write_blob_deduplication() {
1877        let test_repo = TestRepo::<Sha256HashValue>::new();
1878        let repo = &test_repo.repo;
1879
1880        let data = b"duplicate blob content";
1881
1882        let (digest1, verity1) = write_blob(repo, data).unwrap();
1883        let (digest2, verity2) = write_blob(repo, data).unwrap();
1884
1885        assert_eq!(digest1, digest2);
1886        assert_eq!(verity1, verity2);
1887    }
1888
1889    #[test]
1890    fn test_open_blob_bad_digest() {
1891        let test_repo = TestRepo::<Sha256HashValue>::new();
1892        let repo = &test_repo.repo;
1893
1894        let data = b"some blob data";
1895        let (_digest, _verity) = write_blob(repo, data).unwrap();
1896
1897        let bad_digest: OciDigest =
1898            "sha256:0000000000000000000000000000000000000000000000000000000000000000"
1899                .parse()
1900                .unwrap();
1901        let result = open_blob::<Sha256HashValue>(&repo, &bad_digest, None);
1902        assert!(result.is_err());
1903    }
1904
1905    /// Verify that manifest JSON is stored as an external object, not inline.
1906    ///
1907    /// External storage gives each manifest its own file in objects/, allowing
1908    /// fsverity to be independently enabled on the raw content. This is a
1909    /// prerequisite for signing: a signature can reference the fsverity digest
1910    /// of the manifest bytes directly.
1911    #[test]
1912    fn test_manifest_stored_as_external_object() {
1913        let test_repo = TestRepo::<Sha256HashValue>::new();
1914        let repo = &test_repo.repo;
1915
1916        let (manifest_digest, manifest_verity, _) =
1917            create_test_image(repo, Some("ext-test"), "amd64");
1918
1919        let manifest_id = manifest_identifier(&manifest_digest);
1920        let mut stream = repo
1921            .open_stream(&manifest_id, Some(&manifest_verity), None)
1922            .unwrap();
1923
1924        let mut object_refs = Vec::new();
1925        stream
1926            .get_object_refs(|id| object_refs.push(id.clone()))
1927            .unwrap();
1928
1929        // Should have at least one external object (the manifest JSON itself)
1930        assert!(
1931            !object_refs.is_empty(),
1932            "Manifest splitstream should contain external object references"
1933        );
1934
1935        let img = OciImage::open(&repo, &manifest_digest, Some(&manifest_verity)).unwrap();
1936        let manifest_json = img.manifest().to_string().unwrap();
1937        let expected_verity: Sha256HashValue =
1938            composefs::fsverity::compute_verity(manifest_json.as_bytes());
1939
1940        assert!(
1941            object_refs.contains(&expected_verity),
1942            "Manifest JSON fsverity digest should appear in splitstream object refs"
1943        );
1944    }
1945
1946    /// Verify that blob content is stored as an external object.
1947    #[test]
1948    fn test_blob_stored_as_external_object() {
1949        let test_repo = TestRepo::<Sha256HashValue>::new();
1950        let repo = &test_repo.repo;
1951
1952        let data = b"artifact blob content for external storage test";
1953        let (digest, verity) = write_blob(repo, data).unwrap();
1954
1955        let content_id = blob_identifier(&digest);
1956        let mut stream = repo.open_stream(&content_id, Some(&verity), None).unwrap();
1957
1958        let mut object_refs = Vec::new();
1959        stream
1960            .get_object_refs(|id| object_refs.push(id.clone()))
1961            .unwrap();
1962
1963        assert_eq!(
1964            object_refs.len(),
1965            1,
1966            "Blob should be stored as exactly one external object"
1967        );
1968
1969        let expected_verity: Sha256HashValue = composefs::fsverity::compute_verity(data);
1970        assert_eq!(
1971            object_refs[0], expected_verity,
1972            "External object verity should match independently computed verity of blob data"
1973        );
1974    }
1975
1976    /// Test storing and retrieving an OCI artifact with non-tar media type.
1977    ///
1978    /// This simulates what would happen when storing something like a
1979    /// Helm chart, WASM module, or other non-container artifact.
1980    #[test]
1981    fn test_oci_artifact_roundtrip() {
1982        use containers_image_proxy::oci_spec::image::{DescriptorBuilder, ImageManifestBuilder};
1983
1984        let test_repo = TestRepo::<Sha256HashValue>::new();
1985        let repo = &test_repo.repo;
1986
1987        // Create an artifact with a custom media type (simulating a WASM module)
1988        let wasm_bytes = b"\x00asm\x01\x00\x00\x00"; // WASM magic header
1989        let (blob_digest, blob_verity) = write_blob(repo, wasm_bytes).unwrap();
1990
1991        // Create an empty config (common for artifacts)
1992        let empty_config = b"{}";
1993        let config_digest = hash_sha256(empty_config);
1994
1995        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
1996        config_stream.write_external(empty_config).unwrap();
1997        let config_verity = repo
1998            .write_stream(
1999                config_stream,
2000                &crate::config_identifier(&config_digest),
2001                None,
2002            )
2003            .unwrap();
2004
2005        let config_descriptor = DescriptorBuilder::default()
2006            .media_type(MediaType::Other(
2007                "application/vnd.wasm.config.v1+json".to_string(),
2008            ))
2009            .digest(config_digest.clone())
2010            .size(empty_config.len() as u64)
2011            .build()
2012            .unwrap();
2013
2014        let blob_descriptor = DescriptorBuilder::default()
2015            .media_type(MediaType::Other("application/wasm".to_string()))
2016            .digest(blob_digest.clone())
2017            .size(wasm_bytes.len() as u64)
2018            .build()
2019            .unwrap();
2020
2021        let manifest = ImageManifestBuilder::default()
2022            .schema_version(2u32)
2023            .media_type(MediaType::ImageManifest)
2024            .config(config_descriptor)
2025            .layers(vec![blob_descriptor])
2026            .build()
2027            .unwrap();
2028
2029        // For artifacts, we use the blob digest as the "diff_id" equivalent
2030        let layer_verities = [(blob_digest.clone(), blob_verity.clone())];
2031
2032        let manifest_json = manifest.to_string().unwrap();
2033        let manifest_digest = hash_sha256(manifest_json.as_bytes());
2034
2035        let (stored_digest, manifest_verity) = write_manifest(
2036            &repo,
2037            &manifest,
2038            &manifest_digest,
2039            &config_verity,
2040            &layer_verities,
2041            Some("my-wasm-artifact:v1"),
2042        )
2043        .unwrap();
2044
2045        assert_eq!(stored_digest, manifest_digest);
2046
2047        let opened = OciImage::open(&repo, &manifest_digest, Some(&manifest_verity)).unwrap();
2048
2049        assert!(!opened.is_container_image()); // Not a container image
2050        assert_eq!(opened.manifest_digest(), &manifest_digest);
2051        assert_eq!(opened.config_digest(), &config_digest);
2052        assert_eq!(opened.layer_descriptors().len(), 1);
2053        assert_eq!(
2054            opened.layer_descriptors()[0].media_type(),
2055            &MediaType::Other("application/wasm".to_string())
2056        );
2057
2058        let by_tag = OciImage::open_ref(&repo, "my-wasm-artifact:v1").unwrap();
2059        assert_eq!(by_tag.manifest_digest(), &manifest_digest);
2060
2061        let images = list_images(&repo).unwrap();
2062        assert_eq!(images.len(), 1);
2063        assert_eq!(images[0].name, "my-wasm-artifact:v1");
2064        assert!(!images[0].is_container);
2065
2066        let read_wasm = open_blob(&repo, &blob_digest, Some(&blob_verity)).unwrap();
2067        assert_eq!(read_wasm, wasm_bytes);
2068    }
2069
2070    /// Test the OCI 1.1 empty config artifact pattern from the spec:
2071    /// config is `application/vnd.oci.empty.v1+json`, layers use custom
2072    /// media types, and layer digests are used as diff_ids.
2073    /// See: https://github.com/opencontainers/image-spec/blob/main/artifacts-guidance.md
2074    #[test]
2075    fn test_oci_artifact_empty_config() {
2076        let test_repo = TestRepo::<Sha256HashValue>::new();
2077        let repo = &test_repo.repo;
2078
2079        let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
2080        let layer_digest = hash_sha256(sbom_data);
2081
2082        // Store the raw layer as an object with external ref splitstream
2083        let blob_object_id = repo.ensure_object(sbom_data).unwrap();
2084        let layer_content_id = crate::layer_identifier(&layer_digest);
2085        let mut layer_stream = repo
2086            .create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE)
2087            .unwrap();
2088        layer_stream.add_external_size(sbom_data.len() as u64);
2089        layer_stream
2090            .write_reference(blob_object_id.clone())
2091            .unwrap();
2092        let layer_verity = repo
2093            .write_stream(layer_stream, &layer_content_id, None)
2094            .unwrap();
2095
2096        // The OCI 1.1 empty config: `{}` with the well-known digest
2097        let empty_config = b"{}";
2098        let config_digest = hash_sha256(empty_config);
2099        assert_eq!(
2100            config_digest.as_ref(),
2101            "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a"
2102        );
2103
2104        // Store the config — for artifacts we still write it as a config
2105        // splitstream, but it contains no diff_ids-derived named refs.
2106        // Instead, the layer refs come from the manifest layer digests.
2107        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
2108        config_stream.write_external(empty_config).unwrap();
2109        let config_verity = repo
2110            .write_stream(
2111                config_stream,
2112                &crate::config_identifier(&config_digest),
2113                None,
2114            )
2115            .unwrap();
2116
2117        let config_descriptor = DescriptorBuilder::default()
2118            .media_type(MediaType::EmptyJSON)
2119            .digest(config_digest.clone())
2120            .size(empty_config.len() as u64)
2121            .build()
2122            .unwrap();
2123
2124        let layer_descriptor = DescriptorBuilder::default()
2125            .media_type(MediaType::Other("text/spdx+json".to_string()))
2126            .digest(layer_digest.clone())
2127            .size(sbom_data.len() as u64)
2128            .build()
2129            .unwrap();
2130
2131        let manifest = ImageManifestBuilder::default()
2132            .schema_version(2u32)
2133            .media_type(MediaType::ImageManifest)
2134            .config(config_descriptor.clone())
2135            .layers(vec![layer_descriptor])
2136            .build()
2137            .unwrap();
2138
2139        assert_ne!(*config_descriptor.media_type(), MediaType::ImageConfig);
2140
2141        // Store manifest — layer_verities uses the layer digest as key
2142        // (same logic as ensure_config_with_layers when !is_image_config)
2143        let layer_verities = [(layer_digest.clone(), layer_verity.clone())];
2144
2145        let manifest_json = manifest.to_string().unwrap();
2146        let manifest_digest = hash_sha256(manifest_json.as_bytes());
2147
2148        let (_stored_digest, manifest_verity) = write_manifest(
2149            &repo,
2150            &manifest,
2151            &manifest_digest,
2152            &config_verity,
2153            &layer_verities,
2154            Some("my-sbom:v1"),
2155        )
2156        .unwrap();
2157
2158        let opened = OciImage::open(&repo, &manifest_digest, Some(&manifest_verity)).unwrap();
2159        assert!(!opened.is_container_image());
2160        assert_eq!(opened.layer_descriptors().len(), 1);
2161        assert_eq!(
2162            opened.layer_descriptors()[0].media_type(),
2163            &MediaType::Other("text/spdx+json".to_string())
2164        );
2165
2166        let fd = opened.open_layer_fd(&repo, 0).unwrap();
2167        let mut recovered = vec![];
2168        File::from(fd).read_to_end(&mut recovered).unwrap();
2169        assert_eq!(recovered, sbom_data);
2170
2171        assert!(opened.open_layer_fd(&repo, 1).is_err());
2172
2173        let gc = repo.gc(&[]).unwrap();
2174        assert_eq!(gc.objects_removed, 0);
2175
2176        untag_image(&repo, "my-sbom:v1").unwrap();
2177        let gc = repo.gc(&[]).unwrap();
2178        assert!(gc.objects_removed > 0);
2179    }
2180
2181    /// Test that open_layer_fd rejects tar layers.
2182    #[test]
2183    fn test_open_layer_fd_rejects_tar() {
2184        let test_repo = TestRepo::<Sha256HashValue>::new();
2185        let repo = &test_repo.repo;
2186
2187        let (digest, verity, _) = create_test_image(repo, Some("myimage:v1"), "amd64");
2188        let img = OciImage::open(&repo, &digest, Some(&verity)).unwrap();
2189        assert!(img.is_container_image());
2190
2191        // Tar layer should be rejected
2192        let err = img.open_layer_fd(&repo, 0).unwrap_err();
2193        let msg = format!("{err}");
2194        assert!(msg.contains("does not support tar layers"), "got: {msg}");
2195    }
2196
2197    /// Test storing a non-tar layer as a splitstream with a single
2198    /// external reference, simulating how `ensure_layer` handles
2199    /// non-tar media types. The raw bytes go into objects/ and a
2200    /// tiny splitstream holds the reference for GC tracking.
2201    #[test]
2202    fn test_non_tar_layer_storage() {
2203        let test_repo = TestRepo::<Sha256HashValue>::new();
2204        let repo = &test_repo.repo;
2205
2206        let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
2207        let diff_id = hash_sha256(sbom_data);
2208
2209        let object_id = repo.ensure_object(sbom_data).unwrap();
2210
2211        let content_id = crate::layer_identifier(&diff_id);
2212        let mut stream = repo
2213            .create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE)
2214            .unwrap();
2215        stream.add_external_size(sbom_data.len() as u64);
2216        stream.write_reference(object_id.clone()).unwrap();
2217        let stream_verity = repo.write_stream(stream, &content_id, None).unwrap();
2218
2219        let found = repo.has_stream(&content_id).unwrap();
2220        assert!(found.is_some());
2221        assert_eq!(found.unwrap(), stream_verity);
2222
2223        let mut reader = repo
2224            .open_stream(
2225                &content_id,
2226                Some(&stream_verity),
2227                Some(crate::skopeo::OCI_BLOB_CONTENT_TYPE),
2228            )
2229            .unwrap();
2230        let mut refs = vec![];
2231        reader.get_object_refs(|id| refs.push(id.clone())).unwrap();
2232        assert_eq!(refs.len(), 1);
2233        assert_eq!(refs[0], object_id);
2234
2235        let mut recovered = vec![];
2236        File::from(repo.open_object(&object_id).unwrap())
2237            .read_to_end(&mut recovered)
2238            .unwrap();
2239        assert_eq!(recovered, sbom_data);
2240    }
2241
2242    /// Test that a non-tar artifact layer (stored as an external ref)
2243    /// is preserved by GC when referenced from a tagged manifest.
2244    #[test]
2245    fn test_non_tar_artifact_gc() {
2246        use containers_image_proxy::oci_spec::image::{DescriptorBuilder, ImageManifestBuilder};
2247
2248        let test_repo = TestRepo::<Sha256HashValue>::new();
2249        let repo = &test_repo.repo;
2250
2251        let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
2252        let diff_id = hash_sha256(sbom_data);
2253        let blob_object_id = repo.ensure_object(sbom_data).unwrap();
2254
2255        let layer_content_id = crate::layer_identifier(&diff_id);
2256        let mut layer_stream = repo
2257            .create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE)
2258            .unwrap();
2259        layer_stream.add_external_size(sbom_data.len() as u64);
2260        layer_stream
2261            .write_reference(blob_object_id.clone())
2262            .unwrap();
2263        let layer_verity = repo
2264            .write_stream(layer_stream, &layer_content_id, None)
2265            .unwrap();
2266
2267        let config_bytes = b"{}";
2268        let config_digest = hash_sha256(config_bytes);
2269        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
2270        config_stream.write_external(config_bytes).unwrap();
2271        let config_verity = repo
2272            .write_stream(
2273                config_stream,
2274                &crate::config_identifier(&config_digest),
2275                None,
2276            )
2277            .unwrap();
2278
2279        let config_descriptor = DescriptorBuilder::default()
2280            .media_type(MediaType::ImageConfig)
2281            .digest(config_digest.clone())
2282            .size(config_bytes.len() as u64)
2283            .build()
2284            .unwrap();
2285        let layer_descriptor = DescriptorBuilder::default()
2286            .media_type(MediaType::Other("text/spdx+json".to_string()))
2287            .digest(diff_id.clone())
2288            .size(sbom_data.len() as u64)
2289            .build()
2290            .unwrap();
2291        let manifest = ImageManifestBuilder::default()
2292            .schema_version(2u32)
2293            .media_type(MediaType::ImageManifest)
2294            .config(config_descriptor)
2295            .layers(vec![layer_descriptor])
2296            .build()
2297            .unwrap();
2298
2299        let layer_verities = [(diff_id.clone(), layer_verity)];
2300
2301        let manifest_json = manifest.to_string().unwrap();
2302        let manifest_digest = hash_sha256(manifest_json.as_bytes());
2303
2304        let (_stored_digest, _manifest_verity) = write_manifest(
2305            &repo,
2306            &manifest,
2307            &manifest_digest,
2308            &config_verity,
2309            &layer_verities,
2310            Some("my-sbom:v1"),
2311        )
2312        .unwrap();
2313
2314        // GC should preserve everything — the blob object is reachable via
2315        // manifest → config named ref → layer splitstream → external ref
2316        let gc = repo.gc(&[]).unwrap();
2317        assert_eq!(gc.objects_removed, 0, "tagged artifact should be preserved");
2318
2319        let mut recovered = vec![];
2320        File::from(repo.open_object(&blob_object_id).unwrap())
2321            .read_to_end(&mut recovered)
2322            .unwrap();
2323        assert_eq!(recovered, sbom_data);
2324    }
2325
2326    /// Test storing and listing multiple container images.
2327    #[test]
2328    fn test_multiple_images() {
2329        let test_repo = TestRepo::<Sha256HashValue>::new();
2330        let repo = &test_repo.repo;
2331
2332        let (digest1, verity1, _) = create_test_image(repo, Some("app:v1"), "amd64");
2333        let (digest2, verity2, _) = create_test_image(repo, Some("app:v2"), "amd64");
2334        let (digest3, verity3, _) = create_test_image(repo, Some("other:latest"), "arm64");
2335
2336        let images = list_images(repo).unwrap();
2337        assert_eq!(images.len(), 3);
2338
2339        let names: Vec<_> = images.iter().map(|i| i.name.as_str()).collect();
2340        assert!(names.contains(&"app:v1"));
2341        assert!(names.contains(&"app:v2"));
2342        assert!(names.contains(&"other:latest"));
2343
2344        for img in &images {
2345            if img.name == "other:latest" {
2346                assert_eq!(img.architecture, "arm64");
2347            } else {
2348                assert_eq!(img.architecture, "amd64");
2349            }
2350            assert!(img.is_container);
2351        }
2352
2353        let img1 = OciImage::open_ref(repo, "app:v1").unwrap();
2354        assert_eq!(img1.manifest_digest(), &digest1);
2355        assert_eq!(img1.manifest_verity(), &verity1);
2356
2357        let img2 = OciImage::open_ref(repo, "app:v2").unwrap();
2358        assert_eq!(img2.manifest_digest(), &digest2);
2359        assert_eq!(img2.manifest_verity(), &verity2);
2360
2361        let img3 = OciImage::open_ref(repo, "other:latest").unwrap();
2362        assert_eq!(img3.manifest_digest(), &digest3);
2363        assert_eq!(img3.manifest_verity(), &verity3);
2364    }
2365
2366    /// Test that untagging removes the image from listing but preserves data.
2367    #[test]
2368    fn test_untag_image() {
2369        let test_repo = TestRepo::<Sha256HashValue>::new();
2370        let repo = &test_repo.repo;
2371
2372        let (digest1, verity1, _) = create_test_image(repo, Some("myapp:v1"), "amd64");
2373        let (digest2, _verity2, _) = create_test_image(repo, Some("myapp:v2"), "amd64");
2374
2375        let images = list_images(repo).unwrap();
2376        assert_eq!(images.len(), 2);
2377
2378        untag_image(repo, "myapp:v1").unwrap();
2379
2380        let images = list_images(repo).unwrap();
2381        assert_eq!(images.len(), 1);
2382        assert_eq!(images[0].name, "myapp:v2");
2383        assert_eq!(images[0].manifest_digest, digest2);
2384
2385        let img = OciImage::open(repo, &digest1, Some(&verity1)).unwrap();
2386        assert_eq!(img.manifest_digest(), &digest1);
2387
2388        let result = OciImage::open_ref(repo, "myapp:v1");
2389        assert!(result.is_err());
2390    }
2391
2392    /// Test resolving refs and listing refs.
2393    #[test]
2394    fn test_refs() {
2395        let test_repo = TestRepo::<Sha256HashValue>::new();
2396        let repo = &test_repo.repo;
2397
2398        let (digest, verity, _) = create_test_image(repo, Some("test:latest"), "amd64");
2399
2400        let refs = list_refs(repo).unwrap();
2401        assert_eq!(refs.len(), 1);
2402        assert_eq!(refs[0].0, "test:latest");
2403        assert_eq!(refs[0].1, digest);
2404
2405        let (resolved_digest, resolved_verity) = resolve_ref(repo, "test:latest").unwrap();
2406        assert_eq!(resolved_digest, digest);
2407        assert_eq!(resolved_verity, verity);
2408
2409        let result = resolve_ref::<Sha256HashValue>(repo, "nonexistent:tag");
2410        assert!(result.is_err());
2411    }
2412
2413    /// Test that tag_image rejects names containing `@`.
2414    #[test]
2415    fn test_tag_rejects_leading_at_sign() {
2416        let test_repo = TestRepo::<Sha256HashValue>::new();
2417        let repo = &test_repo.repo;
2418
2419        let (digest, _, _) = create_test_image(repo, Some("valid:v1"), "amd64");
2420
2421        // Leading @ is rejected
2422        let result = tag_image(repo, &digest, "@sha256:bad");
2423        assert!(result.is_err());
2424        let err = result.unwrap_err().to_string();
2425        assert!(err.contains("'@' is reserved"), "unexpected error: {err}");
2426
2427        // @ in the middle is fine
2428        let result = tag_image(repo, &digest, "name@digest");
2429        assert!(result.is_ok());
2430    }
2431
2432    /// Test that fsck catches refs starting with `@`.
2433    #[tokio::test]
2434    async fn test_oci_fsck_detects_invalid_ref_name() {
2435        let test_repo = TestRepo::<Sha256HashValue>::new();
2436        let repo = &test_repo.repo;
2437
2438        let (digest, _, _) = create_test_image(repo, Some("good:v1"), "amd64");
2439
2440        // Bypass validate_ref_name by creating the ref symlink directly
2441        let bad_name = "@badref";
2442        let ref_path = format!("streams/refs/{}", oci_ref_path(bad_name));
2443        let manifest_id = manifest_identifier(&digest);
2444        let target = format!("../../{manifest_id}");
2445        repo.symlink(&ref_path, &target)
2446            .expect("create bad ref symlink");
2447
2448        let result = oci_fsck(repo).await.unwrap();
2449        assert!(
2450            result.images_corrupted > 0,
2451            "fsck should report corruption for @ in ref name"
2452        );
2453        assert!(
2454            result
2455                .errors
2456                .iter()
2457                .any(|e| matches!(e, OciFsckError::InvalidRefName { name } if name == bad_name)),
2458            "fsck should report InvalidRefName error"
2459        );
2460        // The bad ref should be counted exactly once
2461        let invalid_count = result
2462            .errors
2463            .iter()
2464            .filter(|e| matches!(e, OciFsckError::InvalidRefName { .. }))
2465            .count();
2466        assert_eq!(invalid_count, 1, "should report exactly one InvalidRefName");
2467    }
2468
2469    /// Test that tagging an existing manifest with a new name works.
2470    #[test]
2471    fn test_tag_existing_manifest() {
2472        let test_repo = TestRepo::<Sha256HashValue>::new();
2473        let repo = &test_repo.repo;
2474
2475        let (digest, verity, _) = create_test_image(repo, Some("original:v1"), "amd64");
2476
2477        tag_image(repo, &digest, "alias:latest").unwrap();
2478
2479        let (d1, v1) = resolve_ref(repo, "original:v1").unwrap();
2480        let (d2, v2) = resolve_ref(repo, "alias:latest").unwrap();
2481        assert_eq!(d1, d2);
2482        assert_eq!(v1, v2);
2483        assert_eq!(d1, digest);
2484        assert_eq!(v1, verity);
2485
2486        let images = list_images(repo).unwrap();
2487        assert_eq!(images.len(), 2);
2488
2489        untag_image(repo, "original:v1").unwrap();
2490        let (d3, _) = resolve_ref(repo, "alias:latest").unwrap();
2491        assert_eq!(d3, digest);
2492
2493        let images = list_images(repo).unwrap();
2494        assert_eq!(images.len(), 1);
2495        assert_eq!(images[0].name, "alias:latest");
2496    }
2497
2498    /// Test opening image by manifest digest (no tag required).
2499    #[test]
2500    fn test_open_by_digest() {
2501        let test_repo = TestRepo::<Sha256HashValue>::new();
2502        let repo = &test_repo.repo;
2503
2504        let (digest, verity, config_digest) = create_test_image(repo, None, "amd64");
2505
2506        let images = list_images(repo).unwrap();
2507        assert!(images.is_empty());
2508
2509        let img = OciImage::open(repo, &digest, Some(&verity)).unwrap();
2510        assert_eq!(img.manifest_digest(), &digest);
2511        assert_eq!(img.config_digest(), &config_digest);
2512        assert!(img.is_container_image());
2513        assert_eq!(img.architecture(), "amd64");
2514
2515        let img2 = OciImage::open(repo, &digest, None).unwrap();
2516        assert_eq!(img2.manifest_digest(), &digest);
2517    }
2518
2519    /// Test fetching manifest and config from stored image.
2520    #[test]
2521    fn test_fetch_manifest_config() {
2522        let test_repo = TestRepo::<Sha256HashValue>::new();
2523        let repo = &test_repo.repo;
2524
2525        let (digest, verity, config_digest) =
2526            create_test_image(repo, Some("fetchtest:v1"), "amd64");
2527
2528        let img = OciImage::open_ref(repo, "fetchtest:v1").unwrap();
2529
2530        assert_eq!(img.manifest_digest(), &digest);
2531        assert_eq!(img.manifest_verity(), &verity);
2532        let manifest = img.manifest();
2533        assert_eq!(manifest.schema_version(), 2u32);
2534        assert_eq!(manifest.layers().len(), 1);
2535
2536        assert_eq!(img.config_digest(), &config_digest);
2537        let config = img.config().expect("should have config");
2538        assert_eq!(config.architecture().to_string(), "amd64");
2539        assert_eq!(config.os().to_string(), "linux");
2540        assert_eq!(config.rootfs().diff_ids().len(), 1);
2541
2542        let diff_ids = img.layer_diff_ids();
2543        assert_eq!(diff_ids.len(), 1);
2544        let layer_verity = img.layer_verity(diff_ids[0]);
2545        assert!(layer_verity.is_some());
2546    }
2547
2548    /// Test that has_manifest correctly detects existing manifests.
2549    #[test]
2550    fn test_has_manifest() {
2551        let test_repo = TestRepo::<Sha256HashValue>::new();
2552        let repo = &test_repo.repo;
2553
2554        let nonexistent: OciDigest =
2555            "sha256:0000000000000000000000000000000000000000000000000000000000000000"
2556                .parse()
2557                .unwrap();
2558        assert!(has_manifest(repo, &nonexistent).unwrap().is_none());
2559
2560        let (digest, verity, _) = create_test_image(repo, None, "amd64");
2561
2562        let found = has_manifest(repo, &digest).unwrap();
2563        assert!(found.is_some());
2564        assert_eq!(found.unwrap(), verity);
2565
2566        assert!(has_manifest(repo, &nonexistent).unwrap().is_none());
2567    }
2568
2569    /// Test empty repository behavior.
2570    #[test]
2571    fn test_empty_repo() {
2572        let test_repo = TestRepo::<Sha256HashValue>::new();
2573        let repo = &test_repo.repo;
2574
2575        // List should return empty vec, not error
2576        let images = list_images(repo).unwrap();
2577        assert!(images.is_empty());
2578
2579        let refs = list_refs(repo).unwrap();
2580        assert!(refs.is_empty());
2581    }
2582
2583    /// Test untagging non-existent tag.
2584    #[test]
2585    fn test_untag_nonexistent() {
2586        let test_repo = TestRepo::<Sha256HashValue>::new();
2587        let repo = &test_repo.repo;
2588
2589        let result = untag_image(repo, "nonexistent:tag");
2590        assert!(result.is_err());
2591    }
2592
2593    // ==================== GC Integration Tests ====================
2594    //
2595    // These tests verify that garbage collection correctly handles OCI images:
2596    // - Tagged images are preserved (tags act as GC roots)
2597    // - Untagged images can be collected
2598    // - Shared layers between images are handled correctly
2599
2600    /// Test that GC preserves a tagged OCI image and all its components.
2601    #[test]
2602    fn test_gc_preserves_tagged_oci_image() {
2603        let test_repo = TestRepo::<Sha256HashValue>::new();
2604        let repo = &test_repo.repo;
2605
2606        let (manifest_digest, manifest_verity, config_digest) =
2607            create_test_image(repo, Some("myapp:v1"), "amd64");
2608
2609        let gc_result = repo.gc(&[]).unwrap();
2610
2611        assert_eq!(gc_result.objects_removed, 0);
2612        assert_eq!(gc_result.streams_pruned, 0);
2613
2614        let img = OciImage::open_ref(repo, "myapp:v1").unwrap();
2615        assert_eq!(img.manifest_digest(), &manifest_digest);
2616        assert_eq!(img.manifest_verity(), &manifest_verity);
2617        assert_eq!(img.config_digest(), &config_digest);
2618
2619        let diff_ids = img.layer_diff_ids();
2620        assert_eq!(diff_ids.len(), 1);
2621        assert!(img.layer_verity(diff_ids[0]).is_some());
2622    }
2623
2624    /// Test that GC removes an untagged OCI image.
2625    #[test]
2626    fn test_gc_removes_untagged_oci_image() {
2627        let test_repo = TestRepo::<Sha256HashValue>::new();
2628        let repo = &test_repo.repo;
2629
2630        let (manifest_digest, manifest_verity, _config_digest) =
2631            create_test_image(repo, None, "amd64");
2632
2633        let img = OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).unwrap();
2634        let diff_ids = img.layer_diff_ids();
2635        assert_eq!(diff_ids.len(), 1);
2636        drop(img);
2637
2638        let gc_result = repo.gc(&[]).unwrap();
2639
2640        assert!(gc_result.objects_removed > 0);
2641
2642        let result = has_manifest(repo, &manifest_digest);
2643        assert!(
2644            result.unwrap().is_none(),
2645            "manifest should be gone after GC"
2646        );
2647    }
2648
2649    /// Test that untagging an image makes it eligible for GC.
2650    #[test]
2651    fn test_gc_after_untag_removes_image() {
2652        let test_repo = TestRepo::<Sha256HashValue>::new();
2653        let repo = &test_repo.repo;
2654
2655        let (manifest_digest, manifest_verity, _) =
2656            create_test_image(repo, Some("temporary:v1"), "amd64");
2657
2658        let gc_result = repo.gc(&[]).unwrap();
2659        assert_eq!(gc_result.objects_removed, 0);
2660
2661        untag_image(repo, "temporary:v1").unwrap();
2662
2663        assert!(OciImage::open_ref(repo, "temporary:v1").is_err());
2664
2665        assert!(OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).is_ok());
2666
2667        let gc_result = repo.gc(&[]).unwrap();
2668        assert!(gc_result.objects_removed > 0);
2669
2670        assert!(has_manifest(repo, &manifest_digest).unwrap().is_none());
2671    }
2672
2673    /// Test GC with two images sharing layers - removing one preserves shared layers.
2674    #[test]
2675    fn test_gc_with_shared_layers() {
2676        let test_repo = TestRepo::<Sha256HashValue>::new();
2677        let repo = &test_repo.repo;
2678
2679        let shared_layer_data = b"shared-base-layer-content";
2680        let shared_layer_digest = hash_sha256(shared_layer_data);
2681
2682        let mut shared_layer_stream = repo
2683            .create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE)
2684            .unwrap();
2685        shared_layer_stream
2686            .write_external(shared_layer_data)
2687            .unwrap();
2688        let shared_layer_verity = repo
2689            .write_stream(
2690                shared_layer_stream,
2691                &crate::layer_identifier(&shared_layer_digest),
2692                None,
2693            )
2694            .unwrap();
2695
2696        // Helper to create an image using the shared layer
2697        let create_image_with_shared_layer = |repo: &Arc<Repository<Sha256HashValue>>,
2698                                              tag: Option<&str>,
2699                                              extra_data: &[u8]|
2700         -> (OciDigest, Sha256HashValue) {
2701            let rootfs = RootFsBuilder::default()
2702                .typ("layers")
2703                .diff_ids(vec![shared_layer_digest.to_string()])
2704                .build()
2705                .unwrap();
2706
2707            let cfg = ConfigBuilder::default().build().unwrap();
2708
2709            // Add unique data to make configs different
2710            let config = ImageConfigurationBuilder::default()
2711                .architecture("amd64")
2712                .os("linux")
2713                .rootfs(rootfs)
2714                .config(cfg)
2715                .created(String::from_utf8_lossy(extra_data).to_string())
2716                .build()
2717                .unwrap();
2718
2719            let config_json = config.to_string().unwrap();
2720            let config_digest = hash_sha256(config_json.as_bytes());
2721
2722            let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
2723            config_stream.add_named_stream_ref(shared_layer_digest.as_ref(), &shared_layer_verity);
2724            config_stream
2725                .write_external(config_json.as_bytes())
2726                .unwrap();
2727            let config_verity = repo
2728                .write_stream(
2729                    config_stream,
2730                    &crate::config_identifier(&config_digest),
2731                    None,
2732                )
2733                .unwrap();
2734
2735            let config_descriptor = DescriptorBuilder::default()
2736                .media_type(MediaType::ImageConfig)
2737                .digest(config_digest.clone())
2738                .size(config_json.len() as u64)
2739                .build()
2740                .unwrap();
2741
2742            let layer_descriptor = DescriptorBuilder::default()
2743                .media_type(MediaType::ImageLayerGzip)
2744                .digest(shared_layer_digest.clone())
2745                .size(shared_layer_data.len() as u64)
2746                .build()
2747                .unwrap();
2748
2749            let manifest = ImageManifestBuilder::default()
2750                .schema_version(2u32)
2751                .media_type(MediaType::ImageManifest)
2752                .config(config_descriptor)
2753                .layers(vec![layer_descriptor])
2754                .build()
2755                .unwrap();
2756
2757            let layer_verities = [(shared_layer_digest.clone(), shared_layer_verity.clone())];
2758
2759            let manifest_json = manifest.to_string().unwrap();
2760            let manifest_digest = hash_sha256(manifest_json.as_bytes());
2761
2762            let (_stored_digest, manifest_verity) = write_manifest(
2763                repo,
2764                &manifest,
2765                &manifest_digest,
2766                &config_verity,
2767                &layer_verities,
2768                tag,
2769            )
2770            .unwrap();
2771
2772            (manifest_digest, manifest_verity)
2773        };
2774
2775        let (digest1, verity1) = create_image_with_shared_layer(repo, Some("tagged:v1"), b"image1");
2776        let (digest2, _verity2) = create_image_with_shared_layer(repo, None, b"image2");
2777
2778        assert!(has_manifest(repo, &digest1).unwrap().is_some());
2779        assert!(has_manifest(repo, &digest2).unwrap().is_some());
2780
2781        let gc_result = repo.gc(&[]).unwrap();
2782
2783        assert!(gc_result.objects_removed > 0);
2784
2785        let img1 = OciImage::open(repo, &digest1, Some(&verity1)).unwrap();
2786        assert_eq!(img1.layer_diff_ids().len(), 1);
2787        assert!(img1.layer_verity(shared_layer_digest.as_ref()).is_some());
2788
2789        assert!(has_manifest(repo, &digest2).unwrap().is_none());
2790
2791        // Shared layer still exists because the tagged image references it
2792        assert!(
2793            repo.has_stream(&crate::layer_identifier(&shared_layer_digest))
2794                .unwrap()
2795                .is_some()
2796        );
2797    }
2798
2799    /// Test that multiple tags on the same manifest are handled correctly.
2800    #[test]
2801    fn test_gc_with_multiple_tags_same_manifest() {
2802        let test_repo = TestRepo::<Sha256HashValue>::new();
2803        let repo = &test_repo.repo;
2804
2805        // Create an image with one tag
2806        let (manifest_digest, manifest_verity, _) =
2807            create_test_image(repo, Some("original:v1"), "amd64");
2808
2809        tag_image(repo, &manifest_digest, "alias:latest").unwrap();
2810
2811        assert_eq!(list_images(repo).unwrap().len(), 2);
2812
2813        untag_image(repo, "original:v1").unwrap();
2814
2815        let gc_result = repo.gc(&[]).unwrap();
2816
2817        assert_eq!(gc_result.objects_removed, 0);
2818
2819        let img = OciImage::open_ref(repo, "alias:latest").unwrap();
2820        assert_eq!(img.manifest_digest(), &manifest_digest);
2821        assert_eq!(img.manifest_verity(), &manifest_verity);
2822
2823        let diff_ids = img.layer_diff_ids();
2824        assert!(img.layer_verity(diff_ids[0]).is_some());
2825
2826        untag_image(repo, "alias:latest").unwrap();
2827
2828        let gc_result = repo.gc(&[]).unwrap();
2829
2830        assert!(gc_result.objects_removed > 0);
2831        assert!(has_manifest(repo, &manifest_digest).unwrap().is_none());
2832    }
2833
2834    /// Test gc_dry_run with OCI images.
2835    #[test]
2836    fn test_gc_dry_run_oci_image() {
2837        let test_repo = TestRepo::<Sha256HashValue>::new();
2838        let repo = &test_repo.repo;
2839
2840        // Create one tagged and one untagged image with DIFFERENT architectures
2841        // to ensure they have unique layer content (create_test_image uses arch in layer data)
2842        let (tagged_digest, tagged_verity, _) = create_test_image(repo, Some("keep:v1"), "amd64");
2843        let (untagged_digest, _untagged_verity, _) = create_test_image(repo, None, "arm64");
2844
2845        assert!(has_manifest(repo, &tagged_digest).unwrap().is_some());
2846        assert!(has_manifest(repo, &untagged_digest).unwrap().is_some());
2847
2848        let dry_run_result = repo.gc_dry_run(&[]).unwrap();
2849        assert!(
2850            dry_run_result.objects_removed > 0,
2851            "dry-run should report objects to remove, got {:?}",
2852            dry_run_result
2853        );
2854
2855        // But nothing should actually be removed
2856        assert!(has_manifest(repo, &tagged_digest).unwrap().is_some());
2857        assert!(has_manifest(repo, &untagged_digest).unwrap().is_some());
2858
2859        let img = OciImage::open(repo, &tagged_digest, Some(&tagged_verity)).unwrap();
2860        assert!(img.layer_verity(img.layer_diff_ids()[0]).is_some());
2861
2862        let real_result = repo.gc(&[]).unwrap();
2863
2864        assert_eq!(real_result.objects_removed, dry_run_result.objects_removed);
2865
2866        assert!(has_manifest(repo, &untagged_digest).unwrap().is_none());
2867        assert!(has_manifest(repo, &tagged_digest).unwrap().is_some());
2868    }
2869
2870    /// Test referrer index: store an artifact, add a referrer entry,
2871    /// then discover it via list_referrers.
2872    #[test]
2873    fn test_referrer_index_roundtrip() {
2874        let test_repo = TestRepo::<Sha256HashValue>::new();
2875        let repo = &test_repo.repo;
2876
2877        let (subject_digest, _, _) = create_test_image(repo, Some("subject:v1"), "amd64");
2878
2879        let empty_config = b"{}";
2880        let config_digest = hash_sha256(empty_config);
2881        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
2882        config_stream.write_external(empty_config).unwrap();
2883        let config_verity = repo
2884            .write_stream(
2885                config_stream,
2886                &crate::config_identifier(&config_digest),
2887                None,
2888            )
2889            .unwrap();
2890
2891        let mut artifact_digests = Vec::new();
2892        for i in 0..2u8 {
2893            let blob_data = format!("artifact-blob-{i}").into_bytes();
2894            let (blob_digest, blob_verity) = write_blob(repo, &blob_data).unwrap();
2895
2896            let config_descriptor = DescriptorBuilder::default()
2897                .media_type(MediaType::EmptyJSON)
2898                .digest(config_digest.clone())
2899                .size(empty_config.len() as u64)
2900                .build()
2901                .unwrap();
2902
2903            let layer_descriptor = DescriptorBuilder::default()
2904                .media_type(MediaType::Other("application/octet-stream".to_string()))
2905                .digest(blob_digest.clone())
2906                .size(blob_data.len() as u64)
2907                .build()
2908                .unwrap();
2909
2910            let manifest = ImageManifestBuilder::default()
2911                .schema_version(2u32)
2912                .media_type(MediaType::ImageManifest)
2913                .config(config_descriptor)
2914                .layers(vec![layer_descriptor])
2915                .build()
2916                .unwrap();
2917
2918            let layer_verities = [(blob_digest, blob_verity)];
2919
2920            let manifest_json = manifest.to_string().unwrap();
2921            let manifest_digest = hash_sha256(manifest_json.as_bytes());
2922
2923            write_manifest(
2924                repo,
2925                &manifest,
2926                &manifest_digest,
2927                &config_verity,
2928                &layer_verities,
2929                None,
2930            )
2931            .unwrap();
2932
2933            add_referrer(repo, &subject_digest, &manifest_digest).unwrap();
2934            artifact_digests.push(manifest_digest);
2935        }
2936
2937        let referrers = list_referrers(repo, &subject_digest).unwrap();
2938        assert_eq!(referrers.len(), 2);
2939
2940        let found_digests: Vec<&OciDigest> = referrers.iter().map(|(d, _)| d).collect();
2941        for expected in &artifact_digests {
2942            assert!(
2943                found_digests.contains(&expected),
2944                "Missing artifact {expected} in referrers"
2945            );
2946        }
2947    }
2948
2949    /// Helper to create a minimal OCI artifact manifest in the repository.
2950    ///
2951    /// Returns (manifest_digest, manifest_verity).
2952    fn create_test_artifact(
2953        repo: &Arc<Repository<Sha256HashValue>>,
2954        blob_data: &[u8],
2955    ) -> (OciDigest, Sha256HashValue) {
2956        let (blob_digest, blob_verity) = write_blob(repo, blob_data).unwrap();
2957
2958        let empty_config = b"{}";
2959        let config_digest = hash_sha256(empty_config);
2960
2961        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
2962        config_stream.write_external(empty_config).unwrap();
2963        let config_verity = repo
2964            .write_stream(
2965                config_stream,
2966                &crate::config_identifier(&config_digest),
2967                None,
2968            )
2969            .unwrap();
2970
2971        let config_descriptor = DescriptorBuilder::default()
2972            .media_type(MediaType::EmptyJSON)
2973            .digest(config_digest.clone())
2974            .size(empty_config.len() as u64)
2975            .build()
2976            .unwrap();
2977
2978        let layer_descriptor = DescriptorBuilder::default()
2979            .media_type(MediaType::Other("application/octet-stream".to_string()))
2980            .digest(blob_digest.clone())
2981            .size(blob_data.len() as u64)
2982            .build()
2983            .unwrap();
2984
2985        let manifest = ImageManifestBuilder::default()
2986            .schema_version(2u32)
2987            .media_type(MediaType::ImageManifest)
2988            .config(config_descriptor)
2989            .layers(vec![layer_descriptor])
2990            .build()
2991            .unwrap();
2992
2993        let layer_verities = [(blob_digest, blob_verity)];
2994
2995        let manifest_json = manifest.to_string().unwrap();
2996        let manifest_digest = hash_sha256(manifest_json.as_bytes());
2997
2998        let (_stored_digest, manifest_verity) = write_manifest(
2999            repo,
3000            &manifest,
3001            &manifest_digest,
3002            &config_verity,
3003            &layer_verities,
3004            None,
3005        )
3006        .unwrap();
3007
3008        (manifest_digest, manifest_verity)
3009    }
3010
3011    /// Test that GC collects referrer artifacts when their subject is untagged.
3012    ///
3013    /// Referrer symlinks under `streams/refs/oci-referrers/` act as GC roots,
3014    /// so orphaned referrer entries must be cleaned up before GC to allow
3015    /// the artifact manifests and their objects to be collected.
3016    #[test]
3017    fn test_gc_cleans_referrer_artifacts() {
3018        let test_repo = TestRepo::<Sha256HashValue>::new();
3019        let repo = &test_repo.repo;
3020
3021        // 1. Create a subject image with a tag
3022        let (subject_digest, _subject_verity, _) =
3023            create_test_image(repo, Some("subject:v1"), "amd64");
3024
3025        // 2. Create an artifact referencing the subject
3026        let (artifact_digest, _artifact_verity) =
3027            create_test_artifact(repo, b"fake-signature-data");
3028
3029        // 3. Register the referrer relationship
3030        add_referrer(repo, &subject_digest, &artifact_digest).unwrap();
3031
3032        // 4. Verify the referrer is discoverable
3033        let referrers = list_referrers(repo, &subject_digest).unwrap();
3034        assert_eq!(referrers.len(), 1);
3035        assert_eq!(referrers[0].0, artifact_digest);
3036
3037        // Verify GC preserves everything while subject is tagged
3038        let gc = repo.gc(&[]).unwrap();
3039        assert_eq!(gc.objects_removed, 0, "nothing should be collected yet");
3040
3041        // Artifact should still be accessible
3042        assert!(
3043            has_manifest(repo, &artifact_digest).unwrap().is_some(),
3044            "artifact manifest should exist"
3045        );
3046
3047        // 5. Untag the subject image
3048        untag_image(repo, "subject:v1").unwrap();
3049
3050        // 6. First GC pass: collects the subject's objects and cleans up
3051        //    its broken stream symlink. The artifact survives because the
3052        //    referrer symlink still acts as a GC root.
3053        let gc1 = repo.gc(&[]).unwrap();
3054        assert!(gc1.objects_removed > 0, "should collect subject objects");
3055        assert!(
3056            has_manifest(repo, &subject_digest).unwrap().is_none(),
3057            "subject manifest should be gone after first GC"
3058        );
3059        // Artifact is still alive — rooted by referrer symlink
3060        assert!(
3061            has_manifest(repo, &artifact_digest).unwrap().is_some(),
3062            "artifact should survive first GC (referrer symlink roots it)"
3063        );
3064
3065        // 7. Clean up dangling referrers (subject no longer exists)
3066        let cleaned = cleanup_dangling_referrers(repo).unwrap();
3067        assert_eq!(cleaned, 1, "should remove 1 dangling referrer entry");
3068
3069        // 8. Second GC pass: now collects the artifact (no longer rooted)
3070        let gc2 = repo.gc(&[]).unwrap();
3071        assert!(gc2.objects_removed > 0, "should collect artifact objects");
3072
3073        // 9. Verify the artifact manifest is gone
3074        assert!(
3075            has_manifest(repo, &artifact_digest).unwrap().is_none(),
3076            "artifact manifest should be collected"
3077        );
3078
3079        // 10. Verify list_referrers returns empty
3080        let referrers = list_referrers(repo, &subject_digest).unwrap();
3081        assert!(referrers.is_empty(), "no referrers should remain after GC");
3082
3083        // Also verify the subject manifest is gone
3084        assert!(
3085            has_manifest(repo, &subject_digest).unwrap().is_none(),
3086            "subject manifest should be collected"
3087        );
3088    }
3089
3090    /// Test that cleanup_dangling_referrers preserves referrers for tagged subjects.
3091    #[test]
3092    fn test_cleanup_referrers_preserves_tagged_subjects() {
3093        let test_repo = TestRepo::<Sha256HashValue>::new();
3094        let repo = &test_repo.repo;
3095
3096        // Create a tagged subject
3097        let (subject_digest, _, _) = create_test_image(repo, Some("subject:v1"), "amd64");
3098
3099        // Create an artifact and register it as a referrer
3100        let (artifact_digest, _) = create_test_artifact(repo, b"sig-data");
3101        add_referrer(repo, &subject_digest, &artifact_digest).unwrap();
3102
3103        // Cleanup should not remove anything — subject is still tagged
3104        let cleaned = cleanup_dangling_referrers(repo).unwrap();
3105        assert_eq!(cleaned, 0, "should not remove referrers for tagged subject");
3106
3107        // Referrer should still be discoverable
3108        let referrers = list_referrers(repo, &subject_digest).unwrap();
3109        assert_eq!(referrers.len(), 1);
3110    }
3111
3112    /// Test that cleanup handles multiple subjects, only removing dangling ones.
3113    #[test]
3114    fn test_cleanup_referrers_mixed_subjects() {
3115        let test_repo = TestRepo::<Sha256HashValue>::new();
3116        let repo = &test_repo.repo;
3117
3118        // Create two subjects
3119        let (subject1_digest, _, _) = create_test_image(repo, Some("subject1:v1"), "amd64");
3120        let (subject2_digest, _, _) = create_test_image(repo, Some("subject2:v1"), "arm64");
3121
3122        // Create artifacts for both
3123        let (artifact1_digest, _) = create_test_artifact(repo, b"sig-for-subject1");
3124        let (artifact2_digest, _) = create_test_artifact(repo, b"sig-for-subject2");
3125
3126        add_referrer(repo, &subject1_digest, &artifact1_digest).unwrap();
3127        add_referrer(repo, &subject2_digest, &artifact2_digest).unwrap();
3128
3129        // Untag only subject1
3130        untag_image(repo, "subject1:v1").unwrap();
3131
3132        // First GC pass to actually remove subject1's manifest stream
3133        // (cleanup_dangling_referrers checks has_manifest, which checks the
3134        // stream symlink; GC removes the broken symlink after object deletion)
3135        repo.gc(&[]).unwrap();
3136
3137        // Now cleanup should only remove referrers for subject1
3138        let cleaned = cleanup_dangling_referrers(repo).unwrap();
3139        assert_eq!(cleaned, 1, "should remove 1 referrer for untagged subject");
3140
3141        // Run GC again to collect the now-unrooted artifact1
3142        let gc = repo.gc(&[]).unwrap();
3143        assert!(gc.objects_removed > 0);
3144
3145        // subject2's referrer should still exist
3146        let referrers2 = list_referrers(repo, &subject2_digest).unwrap();
3147        assert_eq!(referrers2.len(), 1);
3148        assert_eq!(referrers2[0].0, artifact2_digest);
3149
3150        // subject1's artifact should be gone
3151        assert!(has_manifest(repo, &artifact1_digest).unwrap().is_none());
3152        // subject2's artifact should still exist
3153        assert!(has_manifest(repo, &artifact2_digest).unwrap().is_some());
3154    }
3155
3156    /// Test that cleanup_dangling_referrers is a no-op on an empty repository.
3157    #[test]
3158    fn test_cleanup_referrers_empty_repo() {
3159        let test_repo = TestRepo::<Sha256HashValue>::new();
3160        let repo = &test_repo.repo;
3161
3162        let cleaned = cleanup_dangling_referrers(repo).unwrap();
3163        assert_eq!(cleaned, 0);
3164    }
3165
3166    /// Test removing a single referrer: add, remove, verify gone, and
3167    /// confirm that a second remove is idempotent (no error).
3168    #[test]
3169    fn test_remove_referrer() {
3170        let test_repo = TestRepo::<Sha256HashValue>::new();
3171        let repo = &test_repo.repo;
3172
3173        let (subject_digest, _, _) = create_test_image(repo, Some("subject:v1"), "amd64");
3174        let (artifact_digest, _) = create_test_artifact(repo, b"sig-remove-test");
3175
3176        add_referrer(repo, &subject_digest, &artifact_digest).unwrap();
3177        assert_eq!(list_referrers(repo, &subject_digest).unwrap().len(), 1);
3178
3179        // Remove the referrer
3180        remove_referrer(repo, &subject_digest, &artifact_digest).unwrap();
3181        assert!(list_referrers(repo, &subject_digest).unwrap().is_empty());
3182
3183        // Second remove is idempotent
3184        remove_referrer(repo, &subject_digest, &artifact_digest).unwrap();
3185    }
3186
3187    // ==================== Property Tests ====================
3188
3189    mod proptests {
3190        use super::*;
3191        use proptest::prelude::*;
3192
3193        proptest! {
3194            #[test]
3195            fn encode_decode_tag_roundtrip(s in "\\PC*") {
3196                prop_assert_eq!(decode_tag(&encode_tag(&s)), s);
3197            }
3198
3199            #[test]
3200            fn encode_tag_no_slashes(s in "\\PC*") {
3201                prop_assert!(!encode_tag(&s).contains('/'));
3202            }
3203
3204            #[test]
3205            fn hash_deterministic_and_prefixed(data in proptest::collection::vec(any::<u8>(), 0..4096)) {
3206                let h1 = hash_sha256(&data);
3207                let h2 = hash_sha256(&data);
3208                prop_assert_eq!(&h1, &h2);
3209                prop_assert!(AsRef::<str>::as_ref(&h1).starts_with("sha256:"));
3210            }
3211
3212            #[test]
3213            fn manifest_identifier_format(hex in "[0-9a-f]{64}") {
3214                let digest_str = format!("sha256:{hex}");
3215                let digest: OciDigest = digest_str.parse().unwrap();
3216                let id = manifest_identifier(&digest);
3217                prop_assert!(id.starts_with("oci-manifest-"));
3218                prop_assert!(id.ends_with(&digest_str));
3219            }
3220
3221            #[test]
3222            fn blob_identifier_format(hex in "[0-9a-f]{64}") {
3223                let digest_str = format!("sha256:{hex}");
3224                let digest: OciDigest = digest_str.parse().unwrap();
3225                let id = blob_identifier(&digest);
3226                prop_assert!(id.starts_with("oci-blob-"));
3227                prop_assert!(id.ends_with(&digest_str));
3228            }
3229
3230            #[test]
3231            fn write_read_blob_roundtrip(data in proptest::collection::vec(any::<u8>(), 1..4096)) {
3232                let test_repo = TestRepo::<Sha256HashValue>::new();
3233                let repo = &test_repo.repo;
3234
3235                let (digest, verity) = write_blob(repo, &data).unwrap();
3236                let read_back = open_blob(repo, &digest, Some(&verity)).unwrap();
3237                prop_assert_eq!(read_back, data);
3238            }
3239        }
3240    }
3241
3242    /// Test removing all referrers for a subject at once.
3243    #[test]
3244    fn test_remove_referrers_for_subject() {
3245        let test_repo = TestRepo::<Sha256HashValue>::new();
3246        let repo = &test_repo.repo;
3247
3248        let (subject_digest, _, _) = create_test_image(repo, Some("subject:v1"), "amd64");
3249        let (artifact1_digest, _) = create_test_artifact(repo, b"sig-bulk-1");
3250        let (artifact2_digest, _) = create_test_artifact(repo, b"sig-bulk-2");
3251
3252        add_referrer(repo, &subject_digest, &artifact1_digest).unwrap();
3253        add_referrer(repo, &subject_digest, &artifact2_digest).unwrap();
3254        assert_eq!(list_referrers(repo, &subject_digest).unwrap().len(), 2);
3255
3256        // Remove all referrers for this subject
3257        remove_referrers_for_subject(repo, &subject_digest).unwrap();
3258        assert!(list_referrers(repo, &subject_digest).unwrap().is_empty());
3259
3260        // Idempotent: calling again on an already-empty subject is fine
3261        remove_referrers_for_subject(repo, &subject_digest).unwrap();
3262    }
3263
3264    // ==================== OCI Fsck Tests ====================
3265
3266    #[tokio::test]
3267    async fn test_oci_fsck_healthy_image() {
3268        let test_repo = TestRepo::<Sha256HashValue>::new();
3269        let repo = &test_repo.repo;
3270
3271        create_test_image(repo, Some("healthy:v1"), "amd64");
3272
3273        let result = oci_fsck(repo).await.unwrap();
3274
3275        assert!(
3276            result.is_ok(),
3277            "oci_fsck should pass on healthy repo: {result}"
3278        );
3279        assert_eq!(result.images_checked, 1);
3280        assert_eq!(result.images_corrupted, 0);
3281        assert!(result.repo_result.is_ok());
3282        assert!(result.errors.is_empty());
3283    }
3284
3285    #[tokio::test]
3286    async fn test_oci_fsck_detects_corrupt_manifest() {
3287        let test_repo = TestRepo::<Sha256HashValue>::new();
3288        let repo = &test_repo.repo;
3289
3290        let (manifest_digest, manifest_verity, _) =
3291            create_test_image(repo, Some("corrupt:v1"), "amd64");
3292
3293        // The manifest is stored as an external object in a splitstream.
3294        // Find the object file that holds the manifest JSON and corrupt it.
3295        let manifest_id = manifest_identifier(&manifest_digest);
3296        let mut stream = repo
3297            .open_stream(&manifest_id, Some(&manifest_verity), None)
3298            .unwrap();
3299
3300        let mut object_refs: Vec<Sha256HashValue> = Vec::new();
3301        stream
3302            .get_object_refs(|id| object_refs.push(id.clone()))
3303            .unwrap();
3304        assert!(
3305            !object_refs.is_empty(),
3306            "manifest should have an external object ref"
3307        );
3308
3309        // Corrupt the first (manifest JSON) object on disk.
3310        // Objects may be immutable due to fs-verity, so delete and recreate.
3311        let obj = &object_refs[0];
3312        let hex = obj.to_hex();
3313        let (dir, file) = hex.split_at(2);
3314        let obj_path = test_repo.path().join(format!("objects/{dir}/{file}"));
3315        std::fs::remove_file(&obj_path).unwrap();
3316        std::fs::write(&obj_path, b"not valid manifest json").unwrap();
3317
3318        let result = oci_fsck(repo).await.unwrap();
3319
3320        // The underlying repo fsck should detect the corrupted object
3321        assert!(
3322            !result.is_ok(),
3323            "oci_fsck should fail with corrupted manifest object: {result}"
3324        );
3325        assert!(
3326            result.repo_result().objects_corrupted() > 0,
3327            "repo fsck should detect corrupted object"
3328        );
3329    }
3330
3331    #[tokio::test]
3332    async fn test_oci_fsck_detects_missing_layer() {
3333        let test_repo = TestRepo::<Sha256HashValue>::new();
3334        let repo = &test_repo.repo;
3335
3336        let (manifest_digest, manifest_verity, _) =
3337            create_test_image(repo, Some("missing-layer:v1"), "amd64");
3338
3339        // Open the image to find the layer diff_id
3340        let img = OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).unwrap();
3341        let diff_ids = img.layer_diff_ids();
3342        assert_eq!(diff_ids.len(), 1);
3343
3344        // Find the layer stream and its backing splitstream object, then
3345        // delete the stream symlink so the layer appears missing.
3346        let diff_id_parsed: OciDigest = diff_ids[0].parse().unwrap();
3347        let layer_id = crate::layer_identifier(&diff_id_parsed);
3348        let stream_symlink = test_repo.path().join(format!("streams/{layer_id}"));
3349        std::fs::remove_file(&stream_symlink).unwrap();
3350
3351        let result = oci_fsck(repo).await.unwrap();
3352
3353        assert!(
3354            !result.is_ok(),
3355            "oci_fsck should detect missing layer: {result}"
3356        );
3357        assert!(
3358            result.images_corrupted > 0,
3359            "should report corrupted OCI image"
3360        );
3361        assert!(
3362            result
3363                .errors
3364                .iter()
3365                .any(|e| e.to_string().contains("layer-stream-missing")),
3366            "errors should mention missing layer stream: {:?}",
3367            result.errors
3368        );
3369    }
3370
3371    // ==================== Additional OCI Fsck Gap Tests ====================
3372
3373    #[tokio::test]
3374    async fn test_oci_fsck_detects_config_digest_mismatch() {
3375        // Exercises fsck_single_image config digest mismatch (line ~1109).
3376        // Corrupts the config JSON object so its sha256 hash no longer
3377        // matches the digest recorded in the manifest.
3378        let test_repo = TestRepo::<Sha256HashValue>::new();
3379        let repo = &test_repo.repo;
3380
3381        let (manifest_digest, manifest_verity, config_digest) =
3382            create_test_image(repo, Some("config-corrupt:v1"), "amd64");
3383
3384        // Open image to get config verity, then find and corrupt the config object
3385        let img = OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).unwrap();
3386        let config_verity = img.config_verity.clone();
3387        drop(img);
3388
3389        let config_id = crate::config_identifier(&config_digest);
3390        let mut stream = repo
3391            .open_stream(&config_id, Some(&config_verity), None)
3392            .unwrap();
3393        let mut config_obj_refs: Vec<Sha256HashValue> = Vec::new();
3394        stream
3395            .get_object_refs(|id| config_obj_refs.push(id.clone()))
3396            .unwrap();
3397        assert!(!config_obj_refs.is_empty());
3398
3399        // Corrupt the config object — replace with valid JSON that has
3400        // a different hash
3401        let obj = &config_obj_refs[0];
3402        let hex = obj.to_hex();
3403        let (prefix, rest) = hex.split_at(2);
3404        let dir =
3405            cap_std::fs::Dir::open_ambient_dir(test_repo.path(), cap_std::ambient_authority())
3406                .unwrap();
3407        let obj_rel = format!("objects/{prefix}/{rest}");
3408        dir.remove_file(&obj_rel).unwrap();
3409        // Write valid JSON config but with modified content
3410        dir.write(
3411            &obj_rel,
3412            br#"{"architecture":"arm64","os":"linux","rootfs":{"type":"layers","diff_ids":[]}}"#,
3413        )
3414        .unwrap();
3415
3416        let result = oci_fsck(repo).await.unwrap();
3417
3418        // The repo-level fsck will flag the object digest mismatch,
3419        // which makes the overall result not ok.
3420        assert!(
3421            !result.is_ok(),
3422            "oci_fsck should detect config corruption: {result}"
3423        );
3424    }
3425
3426    #[tokio::test]
3427    async fn test_oci_fsck_detects_missing_config_named_ref() {
3428        // Exercises the "manifest missing config reference" branch (line ~1079).
3429        // Deletes the config named ref from the manifest splitstream by
3430        // rewriting the manifest splitstream without the config named ref.
3431        //
3432        // Approach: create a manifest splitstream that stores the manifest
3433        // JSON externally but has NO named ref for the config, then point
3434        // the oci ref to it.
3435        let test_repo = TestRepo::<Sha256HashValue>::new();
3436        let repo = &test_repo.repo;
3437
3438        // Build a valid manifest JSON
3439        let layer_data = b"fake-layer-data";
3440        let layer_digest = hash_sha256(layer_data);
3441
3442        let mut layer_stream = repo
3443            .create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE)
3444            .unwrap();
3445        layer_stream.write_external(layer_data).unwrap();
3446        let layer_verity = repo
3447            .write_stream(layer_stream, &crate::layer_identifier(&layer_digest), None)
3448            .unwrap();
3449
3450        let rootfs = RootFsBuilder::default()
3451            .typ("layers")
3452            .diff_ids(vec![layer_digest.to_string()])
3453            .build()
3454            .unwrap();
3455        let cfg = ConfigBuilder::default().build().unwrap();
3456        let config = ImageConfigurationBuilder::default()
3457            .architecture("amd64")
3458            .os("linux")
3459            .rootfs(rootfs)
3460            .config(cfg)
3461            .build()
3462            .unwrap();
3463        let config_json = config.to_string().unwrap();
3464        let config_digest = hash_sha256(config_json.as_bytes());
3465
3466        // Store config normally
3467        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
3468        config_stream.add_named_stream_ref(layer_digest.as_ref(), &layer_verity);
3469        config_stream
3470            .write_external(config_json.as_bytes())
3471            .unwrap();
3472        let _config_verity = repo
3473            .write_stream(
3474                config_stream,
3475                &crate::config_identifier(&config_digest),
3476                None,
3477            )
3478            .unwrap();
3479
3480        let config_descriptor = DescriptorBuilder::default()
3481            .media_type(MediaType::ImageConfig)
3482            .digest(config_digest.clone())
3483            .size(config_json.len() as u64)
3484            .build()
3485            .unwrap();
3486        let layer_descriptor = DescriptorBuilder::default()
3487            .media_type(MediaType::ImageLayerGzip)
3488            .digest(layer_digest.clone())
3489            .size(layer_data.len() as u64)
3490            .build()
3491            .unwrap();
3492        let manifest = ImageManifestBuilder::default()
3493            .schema_version(2u32)
3494            .media_type(MediaType::ImageManifest)
3495            .config(config_descriptor)
3496            .layers(vec![layer_descriptor])
3497            .build()
3498            .unwrap();
3499
3500        let manifest_json = manifest.to_string().unwrap();
3501        let manifest_digest = hash_sha256(manifest_json.as_bytes());
3502
3503        // Store manifest WITHOUT config named ref — this is the bug we test
3504        let manifest_id = manifest_identifier(&manifest_digest);
3505        let mut manifest_stream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE).unwrap();
3506        // Deliberately omit: manifest_stream.add_named_stream_ref(...)
3507        manifest_stream
3508            .write_external(manifest_json.as_bytes())
3509            .unwrap();
3510        let _manifest_verity = repo
3511            .write_stream(manifest_stream, &manifest_id, None)
3512            .unwrap();
3513
3514        // Create the OCI ref pointing to this manifest
3515        let ref_path = oci_ref_path("no-config-ref:v1");
3516        let stream_path = format!("streams/{manifest_id}");
3517        repo.symlink(&format!("streams/refs/{ref_path}"), &stream_path)
3518            .unwrap();
3519
3520        let result = oci_fsck_image(repo, "no-config-ref:v1").await.unwrap();
3521
3522        assert!(
3523            !result.is_ok(),
3524            "oci_fsck should detect missing config ref: {result}"
3525        );
3526        assert!(
3527            result
3528                .errors
3529                .iter()
3530                .any(|e| e.to_string().contains("config-ref-missing")),
3531            "errors should mention missing config reference: {:?}",
3532            result.errors
3533        );
3534    }
3535
3536    #[tokio::test]
3537    async fn test_oci_fsck_healthy_artifact() {
3538        // Exercises the artifact validation path (line ~1183).
3539        // Creates a non-container artifact and verifies oci_fsck passes.
3540        let test_repo = TestRepo::<Sha256HashValue>::new();
3541        let repo = &test_repo.repo;
3542
3543        // Create an artifact with non-ImageConfig media type
3544        let blob_data = b"artifact-content-for-fsck-test";
3545        let (blob_digest, blob_verity) = write_blob(repo, blob_data).unwrap();
3546
3547        let empty_config = b"{}";
3548        let config_digest = hash_sha256(empty_config);
3549        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
3550        config_stream.write_external(empty_config).unwrap();
3551        let config_verity = repo
3552            .write_stream(
3553                config_stream,
3554                &crate::config_identifier(&config_digest),
3555                None,
3556            )
3557            .unwrap();
3558
3559        let config_descriptor = DescriptorBuilder::default()
3560            .media_type(MediaType::EmptyJSON) // NOT ImageConfig
3561            .digest(config_digest.clone())
3562            .size(empty_config.len() as u64)
3563            .build()
3564            .unwrap();
3565        let layer_descriptor = DescriptorBuilder::default()
3566            .media_type(MediaType::Other("application/octet-stream".to_string()))
3567            .digest(blob_digest.clone())
3568            .size(blob_data.len() as u64)
3569            .build()
3570            .unwrap();
3571        let manifest = ImageManifestBuilder::default()
3572            .schema_version(2u32)
3573            .media_type(MediaType::ImageManifest)
3574            .config(config_descriptor)
3575            .layers(vec![layer_descriptor])
3576            .build()
3577            .unwrap();
3578
3579        let layer_verities = [(blob_digest.to_string(), blob_verity)];
3580
3581        let manifest_json = manifest.to_string().unwrap();
3582        let manifest_digest = hash_sha256(manifest_json.as_bytes());
3583
3584        write_manifest(
3585            repo,
3586            &manifest,
3587            &manifest_digest,
3588            &config_verity,
3589            &layer_verities,
3590            Some("artifact-fsck:v1"),
3591        )
3592        .unwrap();
3593
3594        let result = oci_fsck(repo).await.unwrap();
3595        assert!(
3596            result.is_ok(),
3597            "oci_fsck should pass for healthy artifact: {result}"
3598        );
3599        assert_eq!(result.images_checked, 1);
3600        assert_eq!(result.images_corrupted, 0);
3601    }
3602
3603    #[tokio::test]
3604    async fn test_oci_fsck_detects_missing_artifact_layer_ref() {
3605        // Exercises the artifact "manifest missing layer reference" branch
3606        // (line ~1198). Creates an artifact where the manifest named refs
3607        // don't include the layer digest.
3608        let test_repo = TestRepo::<Sha256HashValue>::new();
3609        let repo = &test_repo.repo;
3610
3611        let blob_data = b"artifact-blob-missing-ref";
3612        let (blob_digest, _blob_verity) = write_blob(repo, blob_data).unwrap();
3613
3614        let empty_config = b"{}";
3615        let config_digest = hash_sha256(empty_config);
3616        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
3617        config_stream.write_external(empty_config).unwrap();
3618        let config_verity = repo
3619            .write_stream(
3620                config_stream,
3621                &crate::config_identifier(&config_digest),
3622                None,
3623            )
3624            .unwrap();
3625
3626        let config_descriptor = DescriptorBuilder::default()
3627            .media_type(MediaType::EmptyJSON)
3628            .digest(config_digest.clone())
3629            .size(empty_config.len() as u64)
3630            .build()
3631            .unwrap();
3632        let layer_descriptor = DescriptorBuilder::default()
3633            .media_type(MediaType::Other("application/wasm".to_string()))
3634            .digest(blob_digest.clone())
3635            .size(blob_data.len() as u64)
3636            .build()
3637            .unwrap();
3638        let manifest = ImageManifestBuilder::default()
3639            .schema_version(2u32)
3640            .media_type(MediaType::ImageManifest)
3641            .config(config_descriptor)
3642            .layers(vec![layer_descriptor])
3643            .build()
3644            .unwrap();
3645
3646        // Deliberately pass empty layer_verities — no layer refs in manifest
3647        let layer_verities: Vec<(String, Sha256HashValue)> = Vec::new();
3648
3649        let manifest_json = manifest.to_string().unwrap();
3650        let manifest_digest = hash_sha256(manifest_json.as_bytes());
3651
3652        write_manifest(
3653            repo,
3654            &manifest,
3655            &manifest_digest,
3656            &config_verity,
3657            &layer_verities,
3658            Some("artifact-no-layer-ref:v1"),
3659        )
3660        .unwrap();
3661
3662        let result = oci_fsck(repo).await.unwrap();
3663
3664        assert!(
3665            !result.is_ok(),
3666            "oci_fsck should detect missing artifact layer ref: {result}"
3667        );
3668        assert!(
3669            result
3670                .errors
3671                .iter()
3672                .any(|e| e.to_string().contains("artifact-layer-ref-missing")),
3673            "errors should mention missing layer reference: {:?}",
3674            result.errors
3675        );
3676    }
3677
3678    #[tokio::test]
3679    async fn test_oci_fsck_image_unresolvable_ref() {
3680        // Exercises oci_fsck_image with an unresolvable ref (line ~1011).
3681        let test_repo = TestRepo::<Sha256HashValue>::new();
3682        let repo = &test_repo.repo;
3683
3684        let result = oci_fsck_image(repo, "nonexistent:tag").await.unwrap();
3685
3686        assert!(!result.is_ok(), "should fail for nonexistent ref");
3687        assert_eq!(result.images_checked, 1);
3688        assert_eq!(result.images_corrupted, 1);
3689        assert!(
3690            result
3691                .errors
3692                .iter()
3693                .any(|e| e.to_string().contains("ref-resolve-failed")),
3694            "errors should mention cannot resolve ref: {:?}",
3695            result.errors
3696        );
3697    }
3698
3699    #[tokio::test]
3700    async fn test_oci_fsck_multiple_images_partial_corruption() {
3701        // Verifies that oci_fsck checks ALL images and correctly counts
3702        // corrupted vs healthy ones when there's a mix.
3703        let test_repo = TestRepo::<Sha256HashValue>::new();
3704        let repo = &test_repo.repo;
3705
3706        // Create two healthy images
3707        create_test_image(repo, Some("healthy1:v1"), "amd64");
3708        let (manifest_digest2, manifest_verity2, _) =
3709            create_test_image(repo, Some("corrupt1:v1"), "arm64");
3710
3711        // Corrupt the second image's layer
3712        let img = OciImage::open(repo, &manifest_digest2, Some(&manifest_verity2)).unwrap();
3713        let diff_ids = img.layer_diff_ids();
3714        let diff_id_parsed: OciDigest = diff_ids[0].parse().unwrap();
3715        let layer_id = crate::layer_identifier(&diff_id_parsed);
3716        let dir =
3717            cap_std::fs::Dir::open_ambient_dir(test_repo.path(), cap_std::ambient_authority())
3718                .unwrap();
3719        dir.remove_file(format!("streams/{layer_id}")).unwrap();
3720
3721        let result = oci_fsck(repo).await.unwrap();
3722
3723        assert!(!result.is_ok(), "should detect corruption: {result}");
3724        assert_eq!(result.images_checked, 2);
3725        assert_eq!(
3726            result.images_corrupted, 1,
3727            "only one image should be corrupt"
3728        );
3729    }
3730
3731    #[tokio::test]
3732    async fn test_oci_fsck_detects_missing_layer_named_ref_in_config() {
3733        // Exercises the "config missing layer reference" branch (line ~1134).
3734        // Creates a container image where the config splitstream is missing
3735        // the named ref for a layer diff_id.
3736        let test_repo = TestRepo::<Sha256HashValue>::new();
3737        let repo = &test_repo.repo;
3738
3739        let layer_data = b"layer-for-missing-ref-test";
3740        let layer_digest = hash_sha256(layer_data);
3741
3742        let mut layer_stream = repo
3743            .create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE)
3744            .unwrap();
3745        layer_stream.write_external(layer_data).unwrap();
3746        let layer_verity = repo
3747            .write_stream(layer_stream, &crate::layer_identifier(&layer_digest), None)
3748            .unwrap();
3749
3750        let rootfs = RootFsBuilder::default()
3751            .typ("layers")
3752            .diff_ids(vec![layer_digest.to_string()])
3753            .build()
3754            .unwrap();
3755        let cfg = ConfigBuilder::default().build().unwrap();
3756        let config = ImageConfigurationBuilder::default()
3757            .architecture("amd64")
3758            .os("linux")
3759            .rootfs(rootfs)
3760            .config(cfg)
3761            .build()
3762            .unwrap();
3763        let config_json = config.to_string().unwrap();
3764        let config_digest = hash_sha256(config_json.as_bytes());
3765
3766        // Store config WITHOUT the layer named ref — this is the bug
3767        let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE).unwrap();
3768        // Deliberately omit: config_stream.add_named_stream_ref(&layer_digest, &layer_verity);
3769        config_stream
3770            .write_external(config_json.as_bytes())
3771            .unwrap();
3772        let config_verity = repo
3773            .write_stream(
3774                config_stream,
3775                &crate::config_identifier(&config_digest),
3776                None,
3777            )
3778            .unwrap();
3779
3780        let config_descriptor = DescriptorBuilder::default()
3781            .media_type(MediaType::ImageConfig)
3782            .digest(config_digest.clone())
3783            .size(config_json.len() as u64)
3784            .build()
3785            .unwrap();
3786        let layer_descriptor = DescriptorBuilder::default()
3787            .media_type(MediaType::ImageLayerGzip)
3788            .digest(layer_digest.clone())
3789            .size(layer_data.len() as u64)
3790            .build()
3791            .unwrap();
3792        let manifest = ImageManifestBuilder::default()
3793            .schema_version(2u32)
3794            .media_type(MediaType::ImageManifest)
3795            .config(config_descriptor)
3796            .layers(vec![layer_descriptor])
3797            .build()
3798            .unwrap();
3799
3800        let layer_verities = [(layer_digest.to_string(), layer_verity)];
3801        let manifest_json = manifest.to_string().unwrap();
3802        let manifest_digest = hash_sha256(manifest_json.as_bytes());
3803
3804        write_manifest(
3805            repo,
3806            &manifest,
3807            &manifest_digest,
3808            &config_verity,
3809            &layer_verities,
3810            Some("missing-layer-ref:v1"),
3811        )
3812        .unwrap();
3813
3814        let result = oci_fsck(repo).await.unwrap();
3815
3816        assert!(
3817            !result.is_ok(),
3818            "oci_fsck should detect missing layer ref in config: {result}"
3819        );
3820        assert!(
3821            result
3822                .errors
3823                .iter()
3824                .any(|e| e.to_string().contains("layer-ref-missing")),
3825            "errors should mention config missing layer reference: {:?}",
3826            result.errors
3827        );
3828    }
3829}