1use std::io::Write;
2use std::path::Path;
3
4use anyhow::{Context, Result};
5use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
6use futures_util::StreamExt as _;
7use oci_client::{
8 Reference,
9 client::{Client, ClientConfig, ClientProtocol},
10 secrets::RegistryAuth,
11};
12use sha2::{Digest, Sha256};
13
14use crate::catalog::LayerCatalog;
15use crate::layering::{LayerGroup, PackingStrategy, pack};
16use crate::popularity::PopularityMap;
17use crate::spec::{ResolvedPackage, ResolvedSpec};
18
19const SOURCE_DATE_EPOCH: u64 = 0;
24
25pub struct OciImage {
27 pub name: String,
28 pub version: String,
29 pub layers: Vec<OciLayer>,
30 pub config: Vec<u8>,
32}
33
34pub struct OciLayer {
35 pub compressed: Vec<u8>,
36 pub descriptor: LayerDescriptor,
37 pub uncompressed_digest: String,
39}
40
41impl OciImage {
42 pub fn manifest_json(&self) -> Result<Vec<u8>> {
44 let config_digest = sha256_hex(&self.config);
45 let config_size = self.config.len() as u64;
46
47 let mut layers_json = String::from("[\n");
48 for (i, layer) in self.layers.iter().enumerate() {
49 let comma = if i + 1 == self.layers.len() { "" } else { "," };
50 layers_json.push_str(&format!(
51 " {{\"mediaType\":\"{}\",\"digest\":\"{}\",\"size\":{}}}{}\n",
52 layer.descriptor.media_type, layer.descriptor.digest, layer.descriptor.size, comma,
53 ));
54 }
55 layers_json.push(']');
56
57 let manifest = format!(
58 r#"{{
59 "schemaVersion": 2,
60 "mediaType": "application/vnd.oci.image.manifest.v1+json",
61 "config": {{
62 "mediaType": "application/vnd.oci.image.config.v1+json",
63 "digest": "sha256:{config_digest}",
64 "size": {config_size}
65 }},
66 "layers": {layers_json}
67}}"#
68 );
69 Ok(manifest.into_bytes())
70 }
71}
72
73pub async fn build(
83 resolved: &ResolvedSpec,
84 strategy: &PackingStrategy,
85 popularity: Option<&PopularityMap>,
86 catalog: Option<&LayerCatalog>,
87) -> Result<OciImage> {
88 let groups = pack(&resolved.packages, strategy, popularity, catalog);
89
90 let http = reqwest::Client::builder()
91 .user_agent("bv-builder/0.1")
92 .timeout(std::time::Duration::from_secs(600))
93 .build()?;
94
95 let base_ref = resolved
97 .base
98 .as_deref()
99 .unwrap_or("docker.io/library/debian:12-slim");
100 let mut layers = fetch_base_layers(base_ref)
101 .await
102 .with_context(|| format!("fetch base image '{base_ref}'"))?;
103
104 let concurrency = std::thread::available_parallelism()
108 .map(|n| n.get())
109 .unwrap_or(1)
110 .min(8);
111 let mut pkg_layers: Vec<OciLayer> = futures_util::stream::iter(groups.iter())
112 .map(|g| build_group_layer(&http, g))
113 .buffered(concurrency)
114 .collect::<Vec<_>>()
115 .await
116 .into_iter()
117 .collect::<Result<Vec<Option<OciLayer>>>>()?
118 .into_iter()
119 .flatten()
120 .collect();
121 layers.append(&mut pkg_layers);
122
123 let meta_layer = build_meta_layer(resolved)?;
125 layers.push(meta_layer);
126
127 let entrypoint_layer = build_entrypoint_layer(resolved)?;
129 layers.push(entrypoint_layer);
130
131 let config = build_config(resolved, &layers)?;
132
133 Ok(OciImage {
134 name: resolved.name.clone(),
135 version: resolved.version.clone(),
136 layers,
137 config,
138 })
139}
140
141async fn fetch_base_layers(base_ref: &str) -> Result<Vec<OciLayer>> {
147 use futures_util::StreamExt;
148
149 let reference: Reference = base_ref
150 .parse()
151 .with_context(|| format!("parse base OCI reference '{base_ref}'"))?;
152
153 let oci_config = ClientConfig {
154 protocol: ClientProtocol::HttpsExcept(vec!["localhost".into(), "127.0.0.1".into()]),
155 ..Default::default()
156 };
157 let client = Client::new(oci_config);
158 let auth = if base_ref.contains("ghcr.io") {
159 if let Ok(token) = std::env::var("GITHUB_TOKEN") {
160 RegistryAuth::Basic("token".into(), token)
161 } else {
162 RegistryAuth::Anonymous
163 }
164 } else {
165 RegistryAuth::Anonymous
166 };
167
168 let (manifest, _digest, config_json) = client
169 .pull_manifest_and_config(&reference, &auth)
170 .await
171 .with_context(|| format!("pull manifest+config for '{base_ref}'"))?;
172
173 let base_config: serde_json::Value =
174 serde_json::from_str(&config_json).context("parse base image config")?;
175 let base_diff_ids = base_config["rootfs"]["diff_ids"]
176 .as_array()
177 .cloned()
178 .unwrap_or_default();
179
180 let mut result = Vec::new();
181 for (i, layer_desc) in manifest.layers.iter().enumerate() {
182 let digest = &layer_desc.digest;
183 let media_type = &layer_desc.media_type;
184 let size = layer_desc.size as u64;
185
186 let mut compressed = Vec::new();
187 let mut stream = client
188 .pull_blob_stream(&reference, layer_desc)
189 .await
190 .with_context(|| format!("pull base layer blob {digest}"))?;
191 while let Some(chunk) = stream.next().await {
192 compressed.extend_from_slice(&chunk?);
193 }
194
195 let uncompressed_digest = base_diff_ids
196 .get(i)
197 .and_then(|v| v.as_str())
198 .unwrap_or(digest)
199 .to_string();
200
201 result.push(OciLayer {
202 compressed,
203 uncompressed_digest,
204 descriptor: LayerDescriptor {
205 digest: digest.clone(),
206 size,
207 media_type: media_type.clone(),
208 conda_package: None,
209 },
210 });
211 }
212
213 Ok(result)
214}
215
216async fn build_group_layer(
223 client: &reqwest::Client,
224 group: &LayerGroup,
225) -> Result<Option<OciLayer>> {
226 let downloaded: Vec<(crate::spec::ResolvedPackage, Vec<u8>)> =
228 futures_util::future::try_join_all(
229 group
230 .packages
231 .iter()
232 .map(|pkg| download_package(client, pkg)),
233 )
234 .await?;
235
236 let conda_package = if group.packages.len() == 1 {
237 let pkg = &group.packages[0];
238 Some(CondaPackagePin {
239 name: pkg.name.clone(),
240 version: pkg.version.clone(),
241 build: pkg.build.clone(),
242 channel: pkg.channel.clone(),
243 sha256: pkg.sha256.clone(),
244 })
245 } else {
246 None
247 };
248
249 tokio::task::spawn_blocking(move || -> Result<Option<OciLayer>> {
251 let work_dir = tempfile::tempdir().context("create temp dir for layer build")?;
252 let prefix = work_dir.path().join("opt").join("conda");
253 std::fs::create_dir_all(&prefix).context("create conda prefix dir")?;
254
255 for (pkg, bytes) in &downloaded {
256 extract_package_bytes(pkg, bytes, &prefix)
257 .with_context(|| format!("extract {}", pkg.filename))?;
258 }
259
260 if !prefix_has_files(&prefix) {
264 return Ok(None);
265 }
266
267 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
268 let digest = format!("sha256:{}", sha256_hex(&compressed));
269 let size = compressed.len() as u64;
270
271 Ok(Some(OciLayer {
272 compressed,
273 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
274 descriptor: LayerDescriptor {
275 digest,
276 size,
277 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
278 conda_package,
279 },
280 }))
281 })
282 .await
283 .context("layer build task panicked")?
284}
285
286fn prefix_has_files(dir: &Path) -> bool {
287 let Ok(entries) = std::fs::read_dir(dir) else {
288 return false;
289 };
290 for entry in entries.flatten() {
291 let Ok(meta) = entry.metadata() else { continue };
292 if meta.is_file() {
293 return true;
294 }
295 if meta.is_dir() && prefix_has_files(&entry.path()) {
296 return true;
297 }
298 }
299 false
300}
301
302async fn download_package(
304 client: &reqwest::Client,
305 pkg: &crate::spec::ResolvedPackage,
306) -> Result<(crate::spec::ResolvedPackage, Vec<u8>)> {
307 use futures_util::StreamExt;
308
309 let resp = client
310 .get(&pkg.url)
311 .send()
312 .await
313 .with_context(|| format!("download {}", pkg.url))?;
314
315 if !resp.status().is_success() {
316 anyhow::bail!("HTTP {} fetching {}", resp.status(), pkg.url);
317 }
318
319 let mut bytes = Vec::new();
320 let mut stream = resp.bytes_stream();
321 while let Some(chunk) = stream.next().await {
322 bytes.extend_from_slice(&chunk?);
323 }
324
325 if !pkg.sha256.is_empty() {
326 let actual = sha256_hex(&bytes);
327 if actual != pkg.sha256 {
328 anyhow::bail!(
329 "sha256 mismatch for {} ({}): expected {} got {}",
330 pkg.name,
331 pkg.filename,
332 pkg.sha256,
333 actual
334 );
335 }
336 }
337
338 Ok((pkg.clone(), bytes))
339}
340
341fn extract_package_bytes(
343 pkg: &crate::spec::ResolvedPackage,
344 bytes: &[u8],
345 dest: &Path,
346) -> Result<()> {
347 if pkg.filename.ends_with(".conda") {
348 extract_conda_archive(bytes, dest)
349 } else if pkg.filename.ends_with(".tar.bz2") {
350 extract_tar_bz2(bytes, dest)
351 } else {
352 Ok(())
353 }
354}
355
356fn extract_conda_archive(data: &[u8], dest: &Path) -> Result<()> {
357 use std::io::Read;
358 let cursor = std::io::Cursor::new(data);
359 let mut zip = zip::ZipArchive::new(cursor).context("open .conda zip")?;
360
361 for i in 0..zip.len() {
362 let mut entry = zip.by_index(i)?;
363 if entry.name().starts_with("pkg-") && entry.name().ends_with(".tar.zst") {
366 let mut zstd_bytes = Vec::new();
367 entry.read_to_end(&mut zstd_bytes)?;
368 let decompressed = zstd::decode_all(std::io::Cursor::new(zstd_bytes))
369 .context("decompress pkg- zstd")?;
370 extract_tar_bytes(&decompressed, dest)?;
371 }
372 }
373 Ok(())
374}
375
376fn extract_tar_bz2(data: &[u8], dest: &Path) -> Result<()> {
377 let decompressed = bzip2::read::BzDecoder::new(data);
378 let mut archive = tar::Archive::new(decompressed);
379 unpack_tar_into(&mut archive, dest)
380}
381
382fn extract_tar_bytes(data: &[u8], dest: &Path) -> Result<()> {
383 let mut archive = tar::Archive::new(std::io::Cursor::new(data));
384 unpack_tar_into(&mut archive, dest)
385}
386
387fn unpack_tar_into<R: std::io::Read>(archive: &mut tar::Archive<R>, dest: &Path) -> Result<()> {
398 for entry in archive.entries().context("read tar entries")? {
399 let mut entry = entry.context("read tar entry")?;
400
401 if entry.header().entry_type() == tar::EntryType::Symlink {
402 let entry_path = entry.path().context("read entry path")?;
403 let link_name = entry
404 .link_name()
405 .context("read symlink target")?
406 .context("missing symlink target")?;
407
408 let rel: std::path::PathBuf = entry_path
410 .components()
411 .filter(|c| matches!(c, std::path::Component::Normal(_)))
412 .collect();
413 let full_path = dest.join(&rel);
414
415 if let Some(parent) = full_path.parent() {
416 std::fs::create_dir_all(parent).ok();
417 }
418 let _ = std::fs::remove_file(&full_path);
419 #[cfg(unix)]
420 std::os::unix::fs::symlink(&*link_name, &full_path)
421 .with_context(|| format!("symlink {:?} -> {:?}", full_path, link_name))?;
422 continue;
423 }
424
425 if let Err(e) = entry.unpack_in(dest) {
426 if e.kind() == std::io::ErrorKind::NotADirectory {
427 continue;
428 }
429 return Err(e).context("unpack tar entry");
430 }
431 }
432 Ok(())
433}
434
435fn create_reproducible_layer(dir: &Path) -> Result<(Vec<u8>, String)> {
444 use std::fs;
445
446 let mut entries: Vec<std::path::PathBuf> = Vec::new();
447 collect_files(dir, &mut entries)?;
448 entries.sort();
449
450 let mut uncompressed: Vec<u8> = Vec::new();
451 {
452 let mut builder = tar::Builder::new(&mut uncompressed);
453 builder.follow_symlinks(false);
454
455 for entry_path in &entries {
456 let rel = entry_path.strip_prefix(dir).unwrap();
457 let meta = fs::symlink_metadata(entry_path)?;
458
459 let mut header = tar::Header::new_ustar();
460 header.set_metadata(&meta);
461 header.set_mtime(SOURCE_DATE_EPOCH);
462 header.set_uid(0);
463 header.set_gid(0);
464 header.set_username("")?;
465 header.set_groupname("")?;
466
467 if meta.file_type().is_symlink() {
468 let target = fs::read_link(entry_path)?;
469 header.set_size(0);
470 header.set_entry_type(tar::EntryType::Symlink);
471 header.set_path(rel)?;
472 header.set_link_name(&target)?;
473 header.set_cksum();
474 builder.append(&header, std::io::empty())?;
475 } else if meta.is_file() {
476 let data = fs::read(entry_path)?;
477 header.set_size(data.len() as u64);
478 header.set_cksum();
479 builder.append_data(&mut header, rel, data.as_slice())?;
480 } else if meta.is_dir() {
481 header.set_size(0);
482 header.set_cksum();
483 builder.append_data(&mut header, rel, std::io::empty())?;
484 }
485 }
486 builder.finish()?;
487 }
488
489 let uncompressed_digest = sha256_hex(&uncompressed);
490
491 let compressed =
493 zstd::encode_all(std::io::Cursor::new(&uncompressed), 19).context("zstd compress layer")?;
494
495 Ok((compressed, uncompressed_digest))
496}
497
498fn collect_files(dir: &Path, out: &mut Vec<std::path::PathBuf>) -> Result<()> {
499 for entry in std::fs::read_dir(dir)? {
500 let entry = entry?;
501 let path = entry.path();
502 let meta = std::fs::symlink_metadata(&path)?;
503 if meta.file_type().is_symlink() {
504 out.push(path);
505 } else if meta.is_dir() {
506 out.push(path.clone());
507 collect_files(&path, out)?;
508 } else {
509 out.push(path);
510 }
511 }
512 Ok(())
513}
514
515fn build_meta_layer(resolved: &ResolvedSpec) -> Result<OciLayer> {
517 let work_dir = tempfile::tempdir().context("create temp dir for meta layer")?;
518 let conda_meta = work_dir.path().join("opt").join("conda").join("conda-meta");
519 std::fs::create_dir_all(&conda_meta)?;
520
521 for pkg in &resolved.packages {
522 let meta = serde_json::json!({
523 "name": pkg.name,
524 "version": pkg.version,
525 "build": pkg.build,
526 "channel": pkg.channel,
527 "url": pkg.url,
528 "sha256": pkg.sha256,
529 });
530 let filename = format!("{}-{}-{}.json", pkg.name, pkg.version, pkg.build);
531 let path = conda_meta.join(filename);
532 std::fs::write(&path, serde_json::to_string_pretty(&meta)?)?;
533 }
534
535 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
536 let digest = format!("sha256:{}", sha256_hex(&compressed));
537 let size = compressed.len() as u64;
538
539 Ok(OciLayer {
540 compressed,
541 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
542 descriptor: LayerDescriptor {
543 digest,
544 size,
545 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
546 conda_package: None,
547 },
548 })
549}
550
551fn build_entrypoint_layer(_resolved: &ResolvedSpec) -> Result<OciLayer> {
554 let work_dir = tempfile::tempdir().context("create temp dir for entrypoint layer")?;
555 let script_path = work_dir.path().join("bv-entrypoint.sh");
556 {
557 let mut f = std::fs::File::create(&script_path)?;
558 writeln!(f, "#!/bin/sh")?;
559 writeln!(f, "# Generated by bv-builder; do not edit")?;
560 writeln!(f, "exec \"$@\"")?;
561 }
562 #[cfg(unix)]
564 {
565 use std::os::unix::fs::PermissionsExt;
566 let mut perms = std::fs::metadata(&script_path)?.permissions();
567 perms.set_mode(0o755);
568 std::fs::set_permissions(&script_path, perms)?;
569 }
570
571 let (compressed, uncompressed_digest) = create_reproducible_layer(work_dir.path())?;
572 let digest = format!("sha256:{}", sha256_hex(&compressed));
573 let size = compressed.len() as u64;
574
575 Ok(OciLayer {
576 compressed,
577 uncompressed_digest: format!("sha256:{uncompressed_digest}"),
578 descriptor: LayerDescriptor {
579 digest,
580 size,
581 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
582 conda_package: None,
583 },
584 })
585}
586
587fn build_config(resolved: &ResolvedSpec, layers: &[OciLayer]) -> Result<Vec<u8>> {
589 let diff_ids: Vec<String> = layers
590 .iter()
591 .map(|l| l.uncompressed_digest.clone())
592 .collect();
593
594 let config = serde_json::json!({
595 "architecture": resolved.platform.to_string().split('/').nth(1).unwrap_or("amd64"),
596 "os": "linux",
597 "created": "1970-01-01T00:00:00Z",
598 "author": "bv-builder",
599 "config": {
600 "Env": [
601 "PATH=/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
602 "LD_LIBRARY_PATH=/opt/conda/lib",
603 ],
604 "Labels": {
605 "org.opencontainers.image.title": &resolved.name,
606 "org.opencontainers.image.version": &resolved.version,
607 }
608 },
609 "rootfs": {
610 "type": "layers",
611 "diff_ids": diff_ids,
612 },
613 "history": []
614 });
615
616 Ok(serde_json::to_vec_pretty(&config)?)
617}
618
619pub fn catalog_updates_from_image(image: &OciImage) -> Vec<(&str, &str, &str, &str)> {
625 image
626 .layers
627 .iter()
628 .filter_map(|layer| {
629 let pin = layer.descriptor.conda_package.as_ref()?;
630 Some((
631 pin.name.as_str(),
632 pin.version.as_str(),
633 pin.build.as_str(),
634 layer.descriptor.digest.as_str(),
635 ))
636 })
637 .collect()
638}
639
640pub fn catalog_coverage(packages: &[ResolvedPackage], catalog: &LayerCatalog) -> (usize, usize) {
643 let hits = packages
644 .iter()
645 .filter(|p| catalog.contains(&p.name, &p.version, &p.build))
646 .count();
647 (hits, packages.len() - hits)
648}
649
650pub fn sha256_hex(data: &[u8]) -> String {
651 let mut hasher = Sha256::new();
652 hasher.update(data);
653 hex::encode(hasher.finalize())
654}
655
656#[cfg(test)]
657mod tests {
658 use super::*;
659
660 #[test]
661 fn sha256_hex_is_correct() {
662 let hash = sha256_hex(b"hello");
663 assert_eq!(
664 hash,
665 "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
666 );
667 }
668
669 #[test]
670 fn create_reproducible_layer_is_deterministic() {
671 let dir = tempfile::tempdir().unwrap();
672 std::fs::write(dir.path().join("file.txt"), b"content").unwrap();
673 let (c1, d1) = create_reproducible_layer(dir.path()).unwrap();
674 let (c2, d2) = create_reproducible_layer(dir.path()).unwrap();
675 assert_eq!(c1, c2, "compressed bytes differ between two runs");
676 assert_eq!(d1, d2, "digests differ between two runs");
677 }
678}