1use std::io::Write;
2use std::path::Path;
3
4use anyhow::{Context, Result};
5use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
6use futures_util::StreamExt as _;
7use oci_client::{
8 Reference,
9 client::{Client, ClientConfig, ClientProtocol},
10 secrets::RegistryAuth,
11};
12use sha2::{Digest, Sha256};
13
14use crate::layering::{LayerGroup, PackingStrategy, pack};
15use crate::popularity::PopularityMap;
16use crate::spec::ResolvedSpec;
17
18const SOURCE_DATE_EPOCH: u64 = 0;
23
24pub struct OciImage {
26 pub name: String,
27 pub version: String,
28 pub layers: Vec<OciLayer>,
29 pub config: Vec<u8>,
31}
32
33pub struct OciLayer {
34 pub compressed: Vec<u8>,
35 pub descriptor: LayerDescriptor,
36 pub uncompressed_digest: String,
38}
39
40impl OciImage {
41 pub fn manifest_json(&self) -> Result<Vec<u8>> {
43 let config_digest = sha256_hex(&self.config);
44 let config_size = self.config.len() as u64;
45
46 let mut layers_json = String::from("[\n");
47 for (i, layer) in self.layers.iter().enumerate() {
48 let comma = if i + 1 == self.layers.len() { "" } else { "," };
49 layers_json.push_str(&format!(
50 " {{\"mediaType\":\"{}\",\"digest\":\"{}\",\"size\":{}}}{}\n",
51 layer.descriptor.media_type, layer.descriptor.digest, layer.descriptor.size, comma,
52 ));
53 }
54 layers_json.push(']');
55
56 let manifest = format!(
57 r#"{{
58 "schemaVersion": 2,
59 "mediaType": "application/vnd.oci.image.manifest.v1+json",
60 "config": {{
61 "mediaType": "application/vnd.oci.image.config.v1+json",
62 "digest": "sha256:{config_digest}",
63 "size": {config_size}
64 }},
65 "layers": {layers_json}
66}}"#
67 );
68 Ok(manifest.into_bytes())
69 }
70}
71
72pub async fn build(
78 resolved: &ResolvedSpec,
79 strategy: &PackingStrategy,
80 popularity: Option<&PopularityMap>,
81) -> Result<OciImage> {
82 let groups = pack(&resolved.packages, strategy, popularity);
83
84 let http = reqwest::Client::builder()
85 .user_agent("bv-builder/0.1")
86 .timeout(std::time::Duration::from_secs(600))
87 .build()?;
88
89 let base_ref = resolved
91 .base
92 .as_deref()
93 .unwrap_or("docker.io/library/debian:12-slim");
94 let mut layers = fetch_base_layers(base_ref)
95 .await
96 .with_context(|| format!("fetch base image '{base_ref}'"))?;
97
98 let concurrency = std::thread::available_parallelism()
102 .map(|n| n.get())
103 .unwrap_or(1)
104 .min(8);
105 let mut pkg_layers: Vec<OciLayer> = futures_util::stream::iter(groups.iter())
106 .map(|g| build_group_layer(&http, g))
107 .buffered(concurrency)
108 .collect::<Vec<_>>()
109 .await
110 .into_iter()
111 .collect::<Result<Vec<Option<OciLayer>>>>()?
112 .into_iter()
113 .flatten()
114 .collect();
115 layers.append(&mut pkg_layers);
116
117 let meta_layer = build_meta_layer(resolved)?;
119 layers.push(meta_layer);
120
121 let entrypoint_layer = build_entrypoint_layer(resolved)?;
123 layers.push(entrypoint_layer);
124
125 let config = build_config(resolved, &layers)?;
126
127 Ok(OciImage {
128 name: resolved.name.clone(),
129 version: resolved.version.clone(),
130 layers,
131 config,
132 })
133}
134
135async fn fetch_base_layers(base_ref: &str) -> Result<Vec<OciLayer>> {
141 use futures_util::StreamExt;
142
143 let reference: Reference = base_ref
144 .parse()
145 .with_context(|| format!("parse base OCI reference '{base_ref}'"))?;
146
147 let oci_config = ClientConfig {
148 protocol: ClientProtocol::HttpsExcept(vec!["localhost".into(), "127.0.0.1".into()]),
149 ..Default::default()
150 };
151 let client = Client::new(oci_config);
152 let auth = if base_ref.contains("ghcr.io") {
153 if let Ok(token) = std::env::var("GITHUB_TOKEN") {
154 RegistryAuth::Basic("token".into(), token)
155 } else {
156 RegistryAuth::Anonymous
157 }
158 } else {
159 RegistryAuth::Anonymous
160 };
161
162 let (manifest, _digest, config_json) = client
163 .pull_manifest_and_config(&reference, &auth)
164 .await
165 .with_context(|| format!("pull manifest+config for '{base_ref}'"))?;
166
167 let base_config: serde_json::Value =
168 serde_json::from_str(&config_json).context("parse base image config")?;
169 let base_diff_ids = base_config["rootfs"]["diff_ids"]
170 .as_array()
171 .cloned()
172 .unwrap_or_default();
173
174 let mut result = Vec::new();
175 for (i, layer_desc) in manifest.layers.iter().enumerate() {
176 let digest = &layer_desc.digest;
177 let media_type = &layer_desc.media_type;
178 let size = layer_desc.size as u64;
179
180 let mut compressed = Vec::new();
181 let mut stream = client
182 .pull_blob_stream(&reference, layer_desc)
183 .await
184 .with_context(|| format!("pull base layer blob {digest}"))?;
185 while let Some(chunk) = stream.next().await {
186 compressed.extend_from_slice(&chunk?);
187 }
188
189 let uncompressed_digest = base_diff_ids
190 .get(i)
191 .and_then(|v| v.as_str())
192 .unwrap_or(digest)
193 .to_string();
194
195 result.push(OciLayer {
196 compressed,
197 uncompressed_digest,
198 descriptor: LayerDescriptor {
199 digest: digest.clone(),
200 size,
201 media_type: media_type.clone(),
202 conda_package: None,
203 },
204 });
205 }
206
207 Ok(result)
208}
209
210async fn build_group_layer(
217 client: &reqwest::Client,
218 group: &LayerGroup,
219) -> Result<Option<OciLayer>> {
220 let downloaded: Vec<(crate::spec::ResolvedPackage, Vec<u8>)> =
222 futures_util::future::try_join_all(
223 group
224 .packages
225 .iter()
226 .map(|pkg| download_package(client, pkg)),
227 )
228 .await?;
229
230 let conda_package = if group.packages.len() == 1 {
231 let pkg = &group.packages[0];
232 Some(CondaPackagePin {
233 name: pkg.name.clone(),
234 version: pkg.version.clone(),
235 build: pkg.build.clone(),
236 channel: pkg.channel.clone(),
237 sha256: pkg.sha256.clone(),
238 })
239 } else {
240 None
241 };
242
243 tokio::task::spawn_blocking(move || -> Result<Option<OciLayer>> {
245 let work_dir = tempfile::tempdir().context("create temp dir for layer build")?;
246 let prefix = work_dir.path().join("opt").join("conda");
247 std::fs::create_dir_all(&prefix).context("create conda prefix dir")?;
248
249 for (pkg, bytes) in &downloaded {
250 extract_package_bytes(pkg, bytes, &prefix)
251 .with_context(|| format!("extract {}", pkg.filename))?;
252 }
253
254 if !prefix_has_files(&prefix) {
258 return Ok(None);
259 }
260
261 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
262 let digest = format!("sha256:{}", sha256_hex(&compressed));
263 let size = compressed.len() as u64;
264
265 Ok(Some(OciLayer {
266 compressed,
267 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
268 descriptor: LayerDescriptor {
269 digest,
270 size,
271 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
272 conda_package,
273 },
274 }))
275 })
276 .await
277 .context("layer build task panicked")?
278}
279
280fn prefix_has_files(dir: &Path) -> bool {
281 let Ok(entries) = std::fs::read_dir(dir) else {
282 return false;
283 };
284 for entry in entries.flatten() {
285 let Ok(meta) = entry.metadata() else { continue };
286 if meta.is_file() {
287 return true;
288 }
289 if meta.is_dir() && prefix_has_files(&entry.path()) {
290 return true;
291 }
292 }
293 false
294}
295
296async fn download_package(
298 client: &reqwest::Client,
299 pkg: &crate::spec::ResolvedPackage,
300) -> Result<(crate::spec::ResolvedPackage, Vec<u8>)> {
301 use futures_util::StreamExt;
302
303 let resp = client
304 .get(&pkg.url)
305 .send()
306 .await
307 .with_context(|| format!("download {}", pkg.url))?;
308
309 if !resp.status().is_success() {
310 anyhow::bail!("HTTP {} fetching {}", resp.status(), pkg.url);
311 }
312
313 let mut bytes = Vec::new();
314 let mut stream = resp.bytes_stream();
315 while let Some(chunk) = stream.next().await {
316 bytes.extend_from_slice(&chunk?);
317 }
318
319 if !pkg.sha256.is_empty() {
320 let actual = sha256_hex(&bytes);
321 if actual != pkg.sha256 {
322 anyhow::bail!(
323 "sha256 mismatch for {} ({}): expected {} got {}",
324 pkg.name,
325 pkg.filename,
326 pkg.sha256,
327 actual
328 );
329 }
330 }
331
332 Ok((pkg.clone(), bytes))
333}
334
335fn extract_package_bytes(
337 pkg: &crate::spec::ResolvedPackage,
338 bytes: &[u8],
339 dest: &Path,
340) -> Result<()> {
341 if pkg.filename.ends_with(".conda") {
342 extract_conda_archive(bytes, dest)
343 } else if pkg.filename.ends_with(".tar.bz2") {
344 extract_tar_bz2(bytes, dest)
345 } else {
346 Ok(())
347 }
348}
349
350fn extract_conda_archive(data: &[u8], dest: &Path) -> Result<()> {
351 use std::io::Read;
352 let cursor = std::io::Cursor::new(data);
353 let mut zip = zip::ZipArchive::new(cursor).context("open .conda zip")?;
354
355 for i in 0..zip.len() {
356 let mut entry = zip.by_index(i)?;
357 if entry.name().starts_with("pkg-") && entry.name().ends_with(".tar.zst") {
360 let mut zstd_bytes = Vec::new();
361 entry.read_to_end(&mut zstd_bytes)?;
362 let decompressed = zstd::decode_all(std::io::Cursor::new(zstd_bytes))
363 .context("decompress pkg- zstd")?;
364 extract_tar_bytes(&decompressed, dest)?;
365 }
366 }
367 Ok(())
368}
369
370fn extract_tar_bz2(data: &[u8], dest: &Path) -> Result<()> {
371 let decompressed = bzip2::read::BzDecoder::new(data);
372 let mut archive = tar::Archive::new(decompressed);
373 archive.unpack(dest).context("unpack tar.bz2")?;
374 Ok(())
375}
376
377fn extract_tar_bytes(data: &[u8], dest: &Path) -> Result<()> {
378 let mut archive = tar::Archive::new(std::io::Cursor::new(data));
379 archive.unpack(dest).context("unpack tar")?;
380 Ok(())
381}
382
383fn create_reproducible_layer(dir: &Path) -> Result<(Vec<u8>, String)> {
392 use std::fs;
393
394 let mut entries: Vec<std::path::PathBuf> = Vec::new();
395 collect_files(dir, &mut entries)?;
396 entries.sort();
397
398 let mut uncompressed: Vec<u8> = Vec::new();
399 {
400 let mut builder = tar::Builder::new(&mut uncompressed);
401 builder.follow_symlinks(false);
402
403 for entry_path in &entries {
404 let rel = entry_path.strip_prefix(dir).unwrap();
405 let meta = fs::symlink_metadata(entry_path)?;
406
407 let mut header = tar::Header::new_ustar();
408 header.set_metadata(&meta);
409 header.set_mtime(SOURCE_DATE_EPOCH);
410 header.set_uid(0);
411 header.set_gid(0);
412 header.set_username("")?;
413 header.set_groupname("")?;
414
415 if meta.file_type().is_symlink() {
416 let target = fs::read_link(entry_path)?;
417 header.set_size(0);
418 header.set_entry_type(tar::EntryType::Symlink);
419 header.set_path(rel)?;
420 header.set_link_name(&target)?;
421 header.set_cksum();
422 builder.append(&header, std::io::empty())?;
423 } else if meta.is_file() {
424 let data = fs::read(entry_path)?;
425 header.set_size(data.len() as u64);
426 header.set_cksum();
427 builder.append_data(&mut header, rel, data.as_slice())?;
428 } else if meta.is_dir() {
429 header.set_size(0);
430 header.set_cksum();
431 builder.append_data(&mut header, rel, std::io::empty())?;
432 }
433 }
434 builder.finish()?;
435 }
436
437 let uncompressed_digest = sha256_hex(&uncompressed);
438
439 let compressed =
441 zstd::encode_all(std::io::Cursor::new(&uncompressed), 19).context("zstd compress layer")?;
442
443 Ok((compressed, uncompressed_digest))
444}
445
446fn collect_files(dir: &Path, out: &mut Vec<std::path::PathBuf>) -> Result<()> {
447 for entry in std::fs::read_dir(dir)? {
448 let entry = entry?;
449 let path = entry.path();
450 let meta = std::fs::symlink_metadata(&path)?;
451 if meta.file_type().is_symlink() {
452 out.push(path);
453 } else if meta.is_dir() {
454 out.push(path.clone());
455 collect_files(&path, out)?;
456 } else {
457 out.push(path);
458 }
459 }
460 Ok(())
461}
462
463fn build_meta_layer(resolved: &ResolvedSpec) -> Result<OciLayer> {
465 let work_dir = tempfile::tempdir().context("create temp dir for meta layer")?;
466 let conda_meta = work_dir.path().join("opt").join("conda").join("conda-meta");
467 std::fs::create_dir_all(&conda_meta)?;
468
469 for pkg in &resolved.packages {
470 let meta = serde_json::json!({
471 "name": pkg.name,
472 "version": pkg.version,
473 "build": pkg.build,
474 "channel": pkg.channel,
475 "url": pkg.url,
476 "sha256": pkg.sha256,
477 });
478 let filename = format!("{}-{}-{}.json", pkg.name, pkg.version, pkg.build);
479 let path = conda_meta.join(filename);
480 std::fs::write(&path, serde_json::to_string_pretty(&meta)?)?;
481 }
482
483 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
484 let digest = format!("sha256:{}", sha256_hex(&compressed));
485 let size = compressed.len() as u64;
486
487 Ok(OciLayer {
488 compressed,
489 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
490 descriptor: LayerDescriptor {
491 digest,
492 size,
493 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
494 conda_package: None,
495 },
496 })
497}
498
499fn build_entrypoint_layer(_resolved: &ResolvedSpec) -> Result<OciLayer> {
502 let work_dir = tempfile::tempdir().context("create temp dir for entrypoint layer")?;
503 let script_path = work_dir.path().join("bv-entrypoint.sh");
504 {
505 let mut f = std::fs::File::create(&script_path)?;
506 writeln!(f, "#!/bin/sh")?;
507 writeln!(f, "# Generated by bv-builder; do not edit")?;
508 writeln!(f, "exec \"$@\"")?;
509 }
510 #[cfg(unix)]
512 {
513 use std::os::unix::fs::PermissionsExt;
514 let mut perms = std::fs::metadata(&script_path)?.permissions();
515 perms.set_mode(0o755);
516 std::fs::set_permissions(&script_path, perms)?;
517 }
518
519 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
520 let digest = format!("sha256:{}", sha256_hex(&compressed));
521 let size = compressed.len() as u64;
522
523 Ok(OciLayer {
524 compressed,
525 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
526 descriptor: LayerDescriptor {
527 digest,
528 size,
529 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
530 conda_package: None,
531 },
532 })
533}
534
535fn build_config(resolved: &ResolvedSpec, layers: &[OciLayer]) -> Result<Vec<u8>> {
537 let diff_ids: Vec<String> = layers
538 .iter()
539 .map(|l| l.uncompressed_digest.clone())
540 .collect();
541
542 let config = serde_json::json!({
543 "architecture": resolved.platform.to_string().split('/').nth(1).unwrap_or("amd64"),
544 "os": "linux",
545 "created": "1970-01-01T00:00:00Z",
546 "author": "bv-builder",
547 "config": {
548 "Env": [
549 "PATH=/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
550 "LD_LIBRARY_PATH=/opt/conda/lib",
551 ],
552 "Labels": {
553 "org.opencontainers.image.title": &resolved.name,
554 "org.opencontainers.image.version": &resolved.version,
555 }
556 },
557 "rootfs": {
558 "type": "layers",
559 "diff_ids": diff_ids,
560 },
561 "history": []
562 });
563
564 Ok(serde_json::to_vec_pretty(&config)?)
565}
566
567pub fn sha256_hex(data: &[u8]) -> String {
568 let mut hasher = Sha256::new();
569 hasher.update(data);
570 hex::encode(hasher.finalize())
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576
577 #[test]
578 fn sha256_hex_is_correct() {
579 let hash = sha256_hex(b"hello");
580 assert_eq!(
581 hash,
582 "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
583 );
584 }
585
586 #[test]
587 fn create_reproducible_layer_is_deterministic() {
588 let dir = tempfile::tempdir().unwrap();
589 std::fs::write(dir.path().join("file.txt"), b"content").unwrap();
590 let (c1, d1) = create_reproducible_layer(dir.path()).unwrap();
591 let (c2, d2) = create_reproducible_layer(dir.path()).unwrap();
592 assert_eq!(c1, c2, "compressed bytes differ between two runs");
593 assert_eq!(d1, d2, "digests differ between two runs");
594 }
595}