Skip to main content

bv_builder/
build.rs

1use std::io::Write;
2use std::path::Path;
3
4use anyhow::{Context, Result};
5use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
6use sha2::{Digest, Sha256};
7
8use crate::layering::{pack, LayerGroup, PackingStrategy};
9use crate::popularity::PopularityMap;
10use crate::spec::ResolvedSpec;
11
12// SOURCE_DATE_EPOCH = 0 (1970-01-01T00:00:00Z).
13// Reproducibility rule: all file mtimes set to this value so that two builds
14// of the same packages produce bit-identical compressed layer blobs.
15// Reference: https://reproducible-builds.org/docs/source-date-epoch/
16const SOURCE_DATE_EPOCH: u64 = 0;
17
18/// An in-memory OCI image ready to be pushed or saved.
19pub struct OciImage {
20    pub name: String,
21    pub version: String,
22    pub layers: Vec<OciLayer>,
23    /// OCI image config JSON bytes (sha256 needed for manifest).
24    pub config: Vec<u8>,
25}
26
27pub struct OciLayer {
28    pub compressed: Vec<u8>,
29    pub descriptor: LayerDescriptor,
30}
31
32impl OciImage {
33    /// Compute the OCI image manifest JSON (image manifest v2/OCI schema).
34    pub fn manifest_json(&self) -> Result<Vec<u8>> {
35        let config_digest = sha256_hex(&self.config);
36        let config_size = self.config.len() as u64;
37
38        let mut layers_json = String::from("[\n");
39        for (i, layer) in self.layers.iter().enumerate() {
40            let comma = if i + 1 == self.layers.len() { "" } else { "," };
41            layers_json.push_str(&format!(
42                "    {{\"mediaType\":\"{}\",\"digest\":\"{}\",\"size\":{}}}{}\n",
43                layer.descriptor.media_type,
44                layer.descriptor.digest,
45                layer.descriptor.size,
46                comma,
47            ));
48        }
49        layers_json.push(']');
50
51        let manifest = format!(
52            r#"{{
53  "schemaVersion": 2,
54  "mediaType": "application/vnd.oci.image.manifest.v1+json",
55  "config": {{
56    "mediaType": "application/vnd.oci.image.config.v1+json",
57    "digest": "sha256:{config_digest}",
58    "size": {config_size}
59  }},
60  "layers": {layers_json}
61}}"#
62        );
63        Ok(manifest.into_bytes())
64    }
65}
66
67/// Build an `OciImage` from a `ResolvedSpec`.
68///
69/// Each package in the spec becomes one OCI layer (or a group when packing
70/// is enabled). Reproducibility rules applied to every layer:
71/// - Tar format: PAX (most portable and reproducible)
72/// - All entry mtimes: SOURCE_DATE_EPOCH (0)
73/// - All uid/gid: 0
74/// - Entries sorted by path before tar creation
75/// - Compression: zstd level 19
76///
77/// Reference: https://reproducible-builds.org/docs/archives/
78pub async fn build(
79    resolved: &ResolvedSpec,
80    strategy: &PackingStrategy,
81    popularity: Option<&PopularityMap>,
82) -> Result<OciImage> {
83    let groups = pack(&resolved.packages, strategy, popularity);
84
85    let client = reqwest::Client::builder()
86        .user_agent("bv-builder/0.1")
87        .timeout(std::time::Duration::from_secs(300))
88        .build()?;
89
90    let mut layers: Vec<OciLayer> = Vec::new();
91
92    for group in &groups {
93        let layer = build_group_layer(&client, group).await?;
94        layers.push(layer);
95    }
96
97    // Meta layer: conda-meta JSON for all packages.
98    let meta_layer = build_meta_layer(resolved)?;
99    layers.push(meta_layer);
100
101    // Entrypoint layer.
102    let entrypoint_layer = build_entrypoint_layer(resolved)?;
103    layers.push(entrypoint_layer);
104
105    let config = build_config(resolved, &layers)?;
106
107    Ok(OciImage {
108        name: resolved.name.clone(),
109        version: resolved.version.clone(),
110        layers,
111        config,
112    })
113}
114
115/// Download and layer a single package group.
116async fn build_group_layer(client: &reqwest::Client, group: &LayerGroup) -> Result<OciLayer> {
117    let work_dir = tempfile::tempdir().context("create temp dir for layer build")?;
118
119    for pkg in &group.packages {
120        download_and_extract_package(client, pkg, work_dir.path()).await?;
121    }
122
123    let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
124    let digest = format!("sha256:{}", sha256_hex(&compressed));
125    let size = compressed.len() as u64;
126
127    // For single-package groups, attach conda_package metadata.
128    let conda_package = if group.packages.len() == 1 {
129        let pkg = &group.packages[0];
130        Some(CondaPackagePin {
131            name: pkg.name.clone(),
132            version: pkg.version.clone(),
133            build: pkg.build.clone(),
134            channel: pkg.channel.clone(),
135            sha256: pkg.sha256.clone(),
136        })
137    } else {
138        None
139    };
140
141    let _ = uncompressed_digest;
142
143    Ok(OciLayer {
144        compressed,
145        descriptor: LayerDescriptor {
146            digest,
147            size,
148            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
149            conda_package,
150        },
151    })
152}
153
154/// Download a conda package and extract it into `dest_dir`.
155async fn download_and_extract_package(
156    client: &reqwest::Client,
157    pkg: &crate::spec::ResolvedPackage,
158    dest_dir: &Path,
159) -> Result<()> {
160    use futures_util::StreamExt;
161
162    let resp = client
163        .get(&pkg.url)
164        .send()
165        .await
166        .with_context(|| format!("download {}", pkg.url))?;
167
168    if !resp.status().is_success() {
169        anyhow::bail!("HTTP {} fetching {}", resp.status(), pkg.url);
170    }
171
172    let mut bytes = Vec::new();
173    let mut stream = resp.bytes_stream();
174    while let Some(chunk) = stream.next().await {
175        bytes.extend_from_slice(&chunk?);
176    }
177
178    // Verify sha256 if present.
179    if !pkg.sha256.is_empty() {
180        let actual = sha256_hex(&bytes);
181        if actual != pkg.sha256 {
182            anyhow::bail!(
183                "sha256 mismatch for {} ({}): expected {} got {}",
184                pkg.name,
185                pkg.filename,
186                pkg.sha256,
187                actual
188            );
189        }
190    }
191
192    // Extract .conda (zip) or .tar.bz2.
193    if pkg.filename.ends_with(".conda") {
194        extract_conda_archive(&bytes, dest_dir)
195            .with_context(|| format!("extract {}", pkg.filename))?;
196    } else if pkg.filename.ends_with(".tar.bz2") {
197        extract_tar_bz2(&bytes, dest_dir)
198            .with_context(|| format!("extract {}", pkg.filename))?;
199    }
200
201    Ok(())
202}
203
204fn extract_conda_archive(data: &[u8], dest: &Path) -> Result<()> {
205    use std::io::Read;
206    let cursor = std::io::Cursor::new(data);
207    let mut zip = zip::ZipArchive::new(cursor).context("open .conda zip")?;
208
209    for i in 0..zip.len() {
210        let mut entry = zip.by_index(i)?;
211        if entry.name().starts_with("pkg-") && entry.name().ends_with(".tar.zst") {
212            let mut zstd_bytes = Vec::new();
213            entry.read_to_end(&mut zstd_bytes)?;
214            let decompressed = zstd::decode_all(std::io::Cursor::new(zstd_bytes))
215                .context("decompress pkg- zstd")?;
216            extract_tar_bytes(&decompressed, dest)?;
217        } else if entry.name().starts_with("info-") && entry.name().ends_with(".tar.zst") {
218            let mut zstd_bytes = Vec::new();
219            entry.read_to_end(&mut zstd_bytes)?;
220            let decompressed = zstd::decode_all(std::io::Cursor::new(zstd_bytes))
221                .context("decompress info- zstd")?;
222            extract_tar_bytes(&decompressed, dest)?;
223        }
224    }
225    Ok(())
226}
227
228fn extract_tar_bz2(data: &[u8], dest: &Path) -> Result<()> {
229    let decompressed = bzip2::read::BzDecoder::new(data);
230    let mut archive = tar::Archive::new(decompressed);
231    archive.unpack(dest).context("unpack tar.bz2")?;
232    Ok(())
233}
234
235fn extract_tar_bytes(data: &[u8], dest: &Path) -> Result<()> {
236    let mut archive = tar::Archive::new(std::io::Cursor::new(data));
237    archive.unpack(dest).context("unpack tar")?;
238    Ok(())
239}
240
241/// Create a reproducible, sorted, zstd-compressed OCI layer tarball from `dir`.
242///
243/// Reproducibility rules (https://reproducible-builds.org/docs/archives/):
244/// - PAX tar format
245/// - All mtimes set to SOURCE_DATE_EPOCH
246/// - All uid/gid set to 0
247/// - Entries sorted by path
248/// - zstd level 19 compression
249fn create_reproducible_layer(dir: &Path) -> Result<(Vec<u8>, String)> {
250    use std::fs;
251
252    let mut entries: Vec<std::path::PathBuf> = Vec::new();
253    collect_files(dir, &mut entries)?;
254    entries.sort();
255
256    let mut uncompressed: Vec<u8> = Vec::new();
257    {
258        let mut builder = tar::Builder::new(&mut uncompressed);
259        builder.follow_symlinks(false);
260
261        for entry_path in &entries {
262            let rel = entry_path.strip_prefix(dir).unwrap();
263            let meta = fs::symlink_metadata(entry_path)?;
264
265            let mut header = tar::Header::new_ustar();
266            header.set_metadata(&meta);
267            header.set_mtime(SOURCE_DATE_EPOCH);
268            header.set_uid(0);
269            header.set_gid(0);
270            header.set_username("")?;
271            header.set_groupname("")?;
272
273            if meta.is_file() {
274                let data = fs::read(entry_path)?;
275                header.set_size(data.len() as u64);
276                header.set_cksum();
277                builder.append_data(&mut header, rel, data.as_slice())?;
278            } else if meta.is_dir() {
279                header.set_size(0);
280                header.set_cksum();
281                builder.append_data(&mut header, rel, std::io::empty())?;
282            }
283        }
284        builder.finish()?;
285    }
286
287    let uncompressed_digest = sha256_hex(&uncompressed);
288
289    // zstd level 19 for maximum compression density.
290    let compressed = zstd::encode_all(std::io::Cursor::new(&uncompressed), 19)
291        .context("zstd compress layer")?;
292
293    Ok((compressed, uncompressed_digest))
294}
295
296fn collect_files(dir: &Path, out: &mut Vec<std::path::PathBuf>) -> Result<()> {
297    for entry in std::fs::read_dir(dir)? {
298        let entry = entry?;
299        let path = entry.path();
300        if path.is_dir() {
301            out.push(path.clone());
302            collect_files(&path, out)?;
303        } else {
304            out.push(path);
305        }
306    }
307    Ok(())
308}
309
310/// Build a thin layer containing `/conda-meta/<pkg>.json` for every package.
311fn build_meta_layer(resolved: &ResolvedSpec) -> Result<OciLayer> {
312    let work_dir = tempfile::tempdir().context("create temp dir for meta layer")?;
313    let conda_meta = work_dir.path().join("conda-meta");
314    std::fs::create_dir_all(&conda_meta)?;
315
316    for pkg in &resolved.packages {
317        let meta = serde_json::json!({
318            "name": pkg.name,
319            "version": pkg.version,
320            "build": pkg.build,
321            "channel": pkg.channel,
322            "url": pkg.url,
323            "sha256": pkg.sha256,
324        });
325        let filename = format!("{}-{}-{}.json", pkg.name, pkg.version, pkg.build);
326        let path = conda_meta.join(filename);
327        std::fs::write(&path, serde_json::to_string_pretty(&meta)?)?;
328    }
329
330    let (compressed, _) = create_reproducible_layer(work_dir.path())?;
331    let digest = format!("sha256:{}", sha256_hex(&compressed));
332    let size = compressed.len() as u64;
333
334    Ok(OciLayer {
335        compressed,
336        descriptor: LayerDescriptor {
337            digest,
338            size,
339            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
340            conda_package: None,
341        },
342    })
343}
344
345/// Build the entrypoint layer: a `/bv-entrypoint.sh` script that exec's the
346/// tool's declared command.
347fn build_entrypoint_layer(_resolved: &ResolvedSpec) -> Result<OciLayer> {
348    let work_dir = tempfile::tempdir().context("create temp dir for entrypoint layer")?;
349    let script_path = work_dir.path().join("bv-entrypoint.sh");
350    {
351        let mut f = std::fs::File::create(&script_path)?;
352        writeln!(f, "#!/bin/sh")?;
353        writeln!(f, "# Generated by bv-builder — do not edit")?;
354        writeln!(f, "exec \"$@\"")?;
355    }
356    // Make executable (755).
357    #[cfg(unix)]
358    {
359        use std::os::unix::fs::PermissionsExt;
360        let mut perms = std::fs::metadata(&script_path)?.permissions();
361        perms.set_mode(0o755);
362        std::fs::set_permissions(&script_path, perms)?;
363    }
364
365    let (compressed, _) = create_reproducible_layer(work_dir.path())?;
366    let digest = format!("sha256:{}", sha256_hex(&compressed));
367    let size = compressed.len() as u64;
368
369    Ok(OciLayer {
370        compressed,
371        descriptor: LayerDescriptor {
372            digest,
373            size,
374            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
375            conda_package: None,
376        },
377    })
378}
379
380/// Build the OCI image config JSON.
381fn build_config(resolved: &ResolvedSpec, layers: &[OciLayer]) -> Result<Vec<u8>> {
382    let diff_ids: Vec<String> = layers
383        .iter()
384        .map(|l| {
385            // DiffID is the sha256 of the *uncompressed* layer; we only have the
386            // compressed digest here, so we use that as a stand-in.
387            l.descriptor.digest.clone()
388        })
389        .collect();
390
391    let config = serde_json::json!({
392        "architecture": resolved.platform.to_string().split('/').nth(1).unwrap_or("amd64"),
393        "os": "linux",
394        "created": "1970-01-01T00:00:00Z",
395        "author": "bv-builder",
396        "config": {
397            "Labels": {
398                "org.opencontainers.image.title": &resolved.name,
399                "org.opencontainers.image.version": &resolved.version,
400            }
401        },
402        "rootfs": {
403            "type": "layers",
404            "diff_ids": diff_ids,
405        },
406        "history": []
407    });
408
409    Ok(serde_json::to_vec_pretty(&config)?)
410}
411
412pub fn sha256_hex(data: &[u8]) -> String {
413    let mut hasher = Sha256::new();
414    hasher.update(data);
415    hex::encode(hasher.finalize())
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    #[test]
423    fn sha256_hex_is_correct() {
424        let hash = sha256_hex(b"hello");
425        assert_eq!(
426            hash,
427            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
428        );
429    }
430
431    #[test]
432    fn create_reproducible_layer_is_deterministic() {
433        let dir = tempfile::tempdir().unwrap();
434        std::fs::write(dir.path().join("file.txt"), b"content").unwrap();
435        let (c1, d1) = create_reproducible_layer(dir.path()).unwrap();
436        let (c2, d2) = create_reproducible_layer(dir.path()).unwrap();
437        assert_eq!(c1, c2, "compressed bytes differ between two runs");
438        assert_eq!(d1, d2, "digests differ between two runs");
439    }
440}