Skip to main content

bv_builder/
catalog.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use anyhow::Context;
5use serde::{Deserialize, Serialize};
6
7/// A record of one conda package's OCI layer blob on the registry.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct CatalogEntry {
10    /// Content-addressed digest of the compressed layer blob ("sha256:...").
11    pub digest: String,
12    /// How many distinct tool builds have included this package as a solo layer.
13    /// Higher count = higher priority for the solo slot when max_layers is tight.
14    pub count: u64,
15}
16
17/// Maps conda package builds to their reproducible OCI layer digest.
18///
19/// Key format: "name==version-build" (e.g. "openssl==3.3.0-h69704a7_0").
20///
21/// Because bv-builder produces bit-identical compressed layer blobs for the
22/// same package triple (SOURCE_DATE_EPOCH + sorted entries + zstd level 19),
23/// the digest here is not a cache hint — it is a stable identity. Any future
24/// build of the same package will produce the same bytes and therefore the
25/// same blob on the registry. Docker/OCI registries deduplicate by content
26/// digest, so two images that both contain "openssl==3.3.0" share exactly one
27/// copy of that layer on disk and on the wire.
28///
29/// The catalog is stored in the registry repo at `layers/catalog.json` and
30/// grows incrementally as new tools are published via `bv publish --spec`.
31/// It replaces the batch-computed `popularity.json` for user-side publishing:
32/// instead of requiring a full scan of all registry specs, each new tool
33/// greedily adds its solo layers to the catalog, and future builds inherit
34/// the benefit automatically.
35#[derive(Debug, Clone, Serialize, Deserialize, Default)]
36pub struct LayerCatalog {
37    pub version: u32,
38    pub entries: HashMap<String, CatalogEntry>,
39}
40
41impl LayerCatalog {
42    pub fn new() -> Self {
43        Self {
44            version: 1,
45            entries: HashMap::new(),
46        }
47    }
48
49    pub fn key(name: &str, version: &str, build: &str) -> String {
50        format!("{name}=={version}-{build}")
51    }
52
53    pub fn get(&self, name: &str, version: &str, build: &str) -> Option<&CatalogEntry> {
54        self.entries.get(&Self::key(name, version, build))
55    }
56
57    pub fn contains(&self, name: &str, version: &str, build: &str) -> bool {
58        self.entries.contains_key(&Self::key(name, version, build))
59    }
60
61    /// Record a solo layer for this package. If the entry already exists the
62    /// count is incremented and the digest is updated (same package triple
63    /// always produces the same digest, so the update is a no-op in practice).
64    pub fn record(&mut self, name: &str, version: &str, build: &str, digest: &str) {
65        let key = Self::key(name, version, build);
66        let entry = self.entries.entry(key).or_insert(CatalogEntry {
67            digest: digest.to_string(),
68            count: 0,
69        });
70        entry.digest = digest.to_string();
71        entry.count += 1;
72    }
73
74    pub fn load(path: &Path) -> anyhow::Result<Self> {
75        let s = std::fs::read_to_string(path)
76            .with_context(|| format!("read layer catalog '{}'", path.display()))?;
77        serde_json::from_str(&s)
78            .with_context(|| format!("parse layer catalog '{}'", path.display()))
79    }
80
81    pub fn save(&self, path: &Path) -> anyhow::Result<()> {
82        let json = serde_json::to_string_pretty(self)?;
83        std::fs::write(path, &json)
84            .with_context(|| format!("write layer catalog '{}'", path.display()))
85    }
86
87    pub fn to_json(&self) -> anyhow::Result<String> {
88        serde_json::to_string_pretty(self).context("serialize catalog")
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn key_format() {
98        assert_eq!(
99            LayerCatalog::key("openssl", "3.3.0", "h69704a7_0"),
100            "openssl==3.3.0-h69704a7_0"
101        );
102    }
103
104    #[test]
105    fn record_increments_count() {
106        let mut cat = LayerCatalog::new();
107        cat.record("openssl", "3.3.0", "h0", "sha256:abc");
108        cat.record("openssl", "3.3.0", "h0", "sha256:abc");
109        let entry = cat.get("openssl", "3.3.0", "h0").unwrap();
110        assert_eq!(entry.count, 2);
111        assert_eq!(entry.digest, "sha256:abc");
112    }
113
114    #[test]
115    fn round_trips_json() {
116        let mut cat = LayerCatalog::new();
117        cat.record("zlib", "1.2.11", "h0_0", "sha256:xyz");
118
119        let dir = tempfile::tempdir().unwrap();
120        let path = dir.path().join("catalog.json");
121        cat.save(&path).unwrap();
122        let loaded = LayerCatalog::load(&path).unwrap();
123        assert!(loaded.contains("zlib", "1.2.11", "h0_0"));
124    }
125}