use std::io::Write;
#[cfg(unix)]
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use eyre::{Context, Result};
use flate2::Compression;
use flate2::write::GzEncoder;
use sha2::{Digest, Sha256};
use tar::{EntryType, Header};
use walkdir::WalkDir;
#[derive(Debug, Clone)]
pub struct LayerBlob {
pub digest: String,
pub diff_id: String,
pub size: u64,
pub bytes: Vec<u8>,
}
pub fn build_layer_from_dir(src_dir: &Path, target_prefix: &str) -> Result<LayerBlob> {
if !src_dir.is_dir() {
eyre::bail!("not a directory: {}", src_dir.display());
}
let entries = collect_sorted_entries(src_dir)?;
build_layer_from_entries(&entries, target_prefix)
}
pub fn build_layer_from_files(files: &[(String, Vec<u8>, u32)]) -> Result<LayerBlob> {
let mut sorted = files.to_vec();
sorted.sort_by(|a, b| a.0.cmp(&b.0));
let mut tar_bytes = Vec::new();
{
let mut builder = tar::Builder::new(&mut tar_bytes);
builder.mode(tar::HeaderMode::Deterministic);
let mut emitted_dirs: std::collections::BTreeSet<String> = Default::default();
for (path, contents, mode) in &sorted {
for dir in parent_dirs(path) {
if emitted_dirs.insert(dir.clone()) {
let mut header = Header::new_gnu();
header.set_entry_type(EntryType::Directory);
header.set_mode(0o755);
header.set_uid(0);
header.set_gid(0);
header.set_size(0);
header.set_mtime(0);
header.set_cksum();
builder
.append_data(&mut header, format!("{dir}/"), std::io::empty())
.wrap_err_with(|| format!("writing dir entry {dir}"))?;
}
}
let mut header = Header::new_gnu();
header.set_entry_type(EntryType::Regular);
header.set_mode(*mode);
header.set_uid(0);
header.set_gid(0);
header.set_size(contents.len() as u64);
header.set_mtime(0);
header.set_cksum();
builder
.append_data(&mut header, path, contents.as_slice())
.wrap_err_with(|| format!("writing file entry {path}"))?;
}
builder.finish()?;
}
finalize_layer(tar_bytes)
}
#[derive(Debug, Clone)]
struct Entry {
rel: PathBuf,
abs: PathBuf,
kind: EntryKind,
mode: u32,
size: u64,
}
#[derive(Debug, Clone)]
enum EntryKind {
Dir,
File,
Symlink(PathBuf),
}
fn collect_sorted_entries(src_dir: &Path) -> Result<Vec<Entry>> {
let canonical_src = std::fs::canonicalize(src_dir).unwrap_or_else(|_| src_dir.to_path_buf());
let mut entries: Vec<Entry> = Vec::new();
for entry in WalkDir::new(src_dir).sort_by_file_name() {
let entry = entry.wrap_err("walking source directory")?;
let abs = entry.path().to_path_buf();
let rel = abs.strip_prefix(src_dir).unwrap().to_path_buf();
if rel.as_os_str().is_empty() {
continue;
}
let file_type = entry.file_type();
let md = entry.path().symlink_metadata()?;
let (kind, mode, size) = if file_type.is_dir() {
(EntryKind::Dir, 0o755u32, 0u64)
} else if file_type.is_symlink() {
let raw_target = std::fs::read_link(entry.path())?;
let target = rebase_symlink_target(&raw_target, &abs, &canonical_src, src_dir);
(EntryKind::Symlink(target), 0o777u32, 0u64)
} else {
let is_exec = file_is_executable(entry.path(), &md);
let mode = if is_exec { 0o755 } else { 0o644 };
(EntryKind::File, mode, md.len())
};
entries.push(Entry {
rel,
abs,
kind,
mode,
size,
});
}
entries.sort_by(|a, b| a.rel.cmp(&b.rel));
Ok(entries)
}
fn build_layer_from_entries(entries: &[Entry], target_prefix: &str) -> Result<LayerBlob> {
let prefix = target_prefix.trim_matches('/');
let mut tar_bytes = Vec::new();
{
let mut builder = tar::Builder::new(&mut tar_bytes);
builder.mode(tar::HeaderMode::Deterministic);
builder.follow_symlinks(false);
let mut emitted_dirs: std::collections::BTreeSet<String> = Default::default();
for dir in prefix_parents(prefix) {
if emitted_dirs.insert(dir.clone()) {
emit_dir(&mut builder, &dir)?;
}
}
for e in entries {
let rel_str = e.rel.to_string_lossy().replace('\\', "/");
let path_in_tar = if prefix.is_empty() {
rel_str
} else {
format!("{prefix}/{rel_str}")
};
match &e.kind {
EntryKind::Dir => {
if emitted_dirs.insert(path_in_tar.clone()) {
emit_dir(&mut builder, &path_in_tar)?;
}
}
EntryKind::File => {
let mut header = Header::new_gnu();
header.set_entry_type(EntryType::Regular);
header.set_mode(e.mode);
header.set_uid(0);
header.set_gid(0);
header.set_size(e.size);
header.set_mtime(0);
header.set_cksum();
let f = std::fs::File::open(&e.abs)
.wrap_err_with(|| format!("opening {}", e.abs.display()))?;
builder
.append_data(&mut header, &path_in_tar, f)
.wrap_err_with(|| format!("writing {path_in_tar}"))?;
}
EntryKind::Symlink(target) => {
let mut header = Header::new_gnu();
header.set_entry_type(EntryType::Symlink);
header.set_mode(e.mode);
header.set_uid(0);
header.set_gid(0);
header.set_size(0);
header.set_mtime(0);
header
.set_link_name(target)
.wrap_err_with(|| format!("symlink target {}", target.display()))?;
header.set_cksum();
builder
.append_data(&mut header, &path_in_tar, std::io::empty())
.wrap_err_with(|| format!("writing symlink {path_in_tar}"))?;
}
}
}
builder.finish()?;
}
finalize_layer(tar_bytes)
}
fn emit_dir<W: Write>(builder: &mut tar::Builder<W>, path: &str) -> Result<()> {
let mut header = Header::new_gnu();
header.set_entry_type(EntryType::Directory);
header.set_mode(0o755);
header.set_uid(0);
header.set_gid(0);
header.set_size(0);
header.set_mtime(0);
header.set_cksum();
let path_with_slash = if path.ends_with('/') {
path.to_string()
} else {
format!("{path}/")
};
builder
.append_data(&mut header, &path_with_slash, std::io::empty())
.wrap_err_with(|| format!("writing dir {path_with_slash}"))?;
Ok(())
}
fn prefix_parents(prefix: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
for part in prefix.split('/').filter(|s| !s.is_empty()) {
if !cur.is_empty() {
cur.push('/');
}
cur.push_str(part);
out.push(cur.clone());
}
out
}
fn parent_dirs(path: &str) -> Vec<String> {
let mut out = Vec::new();
let components: Vec<&str> = path.split('/').collect();
if components.len() <= 1 {
return out;
}
let mut cur = String::new();
for part in &components[..components.len() - 1] {
if part.is_empty() {
continue;
}
if !cur.is_empty() {
cur.push('/');
}
cur.push_str(part);
out.push(cur.clone());
}
out
}
fn finalize_layer(tar_bytes: Vec<u8>) -> Result<LayerBlob> {
let diff_id = {
let mut h = Sha256::new();
h.update(&tar_bytes);
format!("sha256:{}", hex_encode(&h.finalize()))
};
let mut gz_bytes = Vec::new();
{
let mut encoder = GzEncoder::new(&mut gz_bytes, Compression::new(6));
encoder.write_all(&tar_bytes)?;
encoder.finish()?;
}
if gz_bytes.len() >= 10 {
gz_bytes[4..8].copy_from_slice(&[0, 0, 0, 0]);
gz_bytes[8] = 0;
gz_bytes[9] = 0xff;
}
let digest = {
let mut h = Sha256::new();
h.update(&gz_bytes);
format!("sha256:{}", hex_encode(&h.finalize()))
};
let size = gz_bytes.len() as u64;
Ok(LayerBlob {
digest,
diff_id,
size,
bytes: gz_bytes,
})
}
#[cfg(unix)]
fn file_is_executable(_path: &Path, md: &std::fs::Metadata) -> bool {
(md.mode() & 0o111) != 0
}
#[cfg(not(unix))]
fn file_is_executable(path: &Path, _md: &std::fs::Metadata) -> bool {
matches!(
path.extension()
.and_then(|e| e.to_str())
.map(|s| s.to_ascii_lowercase())
.as_deref(),
Some("exe") | Some("bat") | Some("cmd") | Some("ps1") | Some("com"),
)
}
fn rebase_symlink_target(
raw: &Path,
link_abs_path: &Path,
canonical_src: &Path,
src_dir: &Path,
) -> PathBuf {
if !raw.is_absolute() {
return raw.to_path_buf();
}
let target_canon = std::fs::canonicalize(raw).unwrap_or_else(|_| raw.to_path_buf());
let rel_target: PathBuf = if let Ok(r) = target_canon.strip_prefix(canonical_src) {
r.to_path_buf()
} else if let Ok(r) = raw.strip_prefix(src_dir) {
r.to_path_buf()
} else {
warn!(
"oci layer: symlink {} → {} has an absolute target outside the tool's install dir; \
it will be dangling inside the container",
link_abs_path.display(),
raw.display()
);
return raw.to_path_buf();
};
let link_rel = link_abs_path
.strip_prefix(src_dir)
.unwrap_or(Path::new(""))
.to_path_buf();
let depth = link_rel.components().count().saturating_sub(1);
let mut out = PathBuf::new();
for _ in 0..depth {
out.push("..");
}
out.push(rel_target);
out
}
pub(crate) fn hex_encode(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn reproducible_same_inputs_same_digest() {
let dir = tempdir().unwrap();
fs::create_dir_all(dir.path().join("bin")).unwrap();
fs::write(dir.path().join("bin/hello"), b"#!/bin/sh\necho hi\n").unwrap();
fs::write(dir.path().join("README"), b"hello\n").unwrap();
let a = build_layer_from_dir(dir.path(), "mise/installs/test/1.0").unwrap();
let b = build_layer_from_dir(dir.path(), "mise/installs/test/1.0").unwrap();
assert_eq!(a.digest, b.digest, "digests should match across runs");
assert_eq!(a.diff_id, b.diff_id, "diff_ids should match across runs");
assert_eq!(a.bytes, b.bytes, "bytes should match across runs");
}
#[test]
fn different_prefix_different_digest() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("x"), b"x").unwrap();
let a = build_layer_from_dir(dir.path(), "a").unwrap();
let b = build_layer_from_dir(dir.path(), "b").unwrap();
assert_ne!(a.digest, b.digest);
}
#[test]
fn files_layer_is_reproducible() {
let files = vec![
("etc/mise/config.toml".to_string(), b"foo\n".to_vec(), 0o644),
(
"usr/local/bin/mise".to_string(),
b"#!/bin/sh\nexec true\n".to_vec(),
0o755,
),
];
let a = build_layer_from_files(&files).unwrap();
let b = build_layer_from_files(&files).unwrap();
assert_eq!(a.bytes, b.bytes);
}
#[cfg(unix)]
#[test]
fn absolute_intra_tree_symlinks_become_relative() {
use std::os::unix::fs::symlink;
let dir = tempdir().unwrap();
let src = dir.path();
fs::create_dir_all(src.join("bin")).unwrap();
fs::create_dir_all(src.join("lib/node_modules/npm/bin")).unwrap();
fs::write(
src.join("lib/node_modules/npm/bin/npm-cli.js"),
b"#!/bin/sh\n",
)
.unwrap();
let canonical = std::fs::canonicalize(src).unwrap();
let target = canonical.join("lib/node_modules/npm/bin/npm-cli.js");
symlink(&target, src.join("bin/npm")).unwrap();
symlink("/usr/bin/false", src.join("bin/external")).unwrap();
let entries = collect_sorted_entries(src).unwrap();
let npm = entries
.iter()
.find(|e| e.rel == Path::new("bin/npm"))
.unwrap();
match &npm.kind {
EntryKind::Symlink(t) => {
assert!(
!t.is_absolute(),
"intra-tree symlink should have been rewritten to relative, got {t:?}",
);
assert_eq!(t, &PathBuf::from("../lib/node_modules/npm/bin/npm-cli.js"),);
}
k => panic!("expected symlink, got {k:?}"),
}
let external = entries
.iter()
.find(|e| e.rel == Path::new("bin/external"))
.unwrap();
match &external.kind {
EntryKind::Symlink(t) => assert_eq!(t, &PathBuf::from("/usr/bin/false")),
k => panic!("expected symlink, got {k:?}"),
}
}
}