#![cfg(target_os = "macos")]
use std::os::unix::fs::{FileExt, MetadataExt};
use std::path::Path;
#[derive(Debug, Clone, Copy, Default)]
pub struct DedupStats {
pub apparent_before: u64,
pub apparent_after: u64,
pub diff_pages_written: u64,
}
#[derive(Debug)]
pub enum DedupError {
SizeMismatch { canon: u64, target: u64 },
Stat(std::io::Error),
Clonefile(std::io::Error),
Mmap(std::io::Error),
Write(std::io::Error),
Sync(std::io::Error),
Rename(std::io::Error),
PathNul,
EmptyFile,
}
impl std::fmt::Display for DedupError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::SizeMismatch { canon, target } => write!(
f,
"size mismatch: canon={canon} target={target}"
),
Self::Stat(e) => write!(f, "stat: {e}"),
Self::Clonefile(e) => write!(f, "clonefile: {e}"),
Self::Mmap(e) => write!(f, "mmap: {e}"),
Self::Write(e) => write!(f, "pwrite: {e}"),
Self::Sync(e) => write!(f, "fsync: {e}"),
Self::Rename(e) => write!(f, "rename: {e}"),
Self::PathNul => write!(f, "path contains NUL byte"),
Self::EmptyFile => write!(f, "empty file"),
}
}
}
impl std::error::Error for DedupError {}
pub fn dedup_against(canon: &Path, target: &Path) -> Result<DedupStats, DedupError> {
let canon_md = std::fs::metadata(canon).map_err(DedupError::Stat)?;
let target_md = std::fs::metadata(target).map_err(DedupError::Stat)?;
if canon_md.len() != target_md.len() {
return Err(DedupError::SizeMismatch {
canon: canon_md.len(),
target: target_md.len(),
});
}
let file_len = canon_md.len();
if file_len == 0 {
return Err(DedupError::EmptyFile);
}
let apparent_before = target_md.blocks().saturating_mul(512);
let tmp_path = target.with_extension("snap.dedup.tmp");
let _ = std::fs::remove_file(&tmp_path);
{
use std::os::unix::ffi::OsStrExt;
let canon_c = std::ffi::CString::new(canon.as_os_str().as_bytes())
.map_err(|_| DedupError::PathNul)?;
let tmp_c = std::ffi::CString::new(tmp_path.as_os_str().as_bytes())
.map_err(|_| DedupError::PathNul)?;
let ret = unsafe { libc::clonefile(canon_c.as_ptr(), tmp_c.as_ptr(), 0) };
if ret != 0 {
return Err(DedupError::Clonefile(std::io::Error::last_os_error()));
}
}
let target_data = mmap_readonly(target, file_len)?;
let tmp_data = mmap_readonly(&tmp_path, file_len)?;
let tmp_file = std::fs::OpenOptions::new()
.write(true)
.open(&tmp_path)
.map_err(DedupError::Write)?;
const PAGE: usize = 4096;
let total = file_len as usize;
let mut diff_pages: u64 = 0;
let mut off = 0usize;
while off < total {
let end = (off + PAGE).min(total);
let t = &target_data[off..end];
let c = &tmp_data[off..end];
if t != c {
tmp_file
.write_all_at(t, off as u64)
.map_err(DedupError::Write)?;
diff_pages += 1;
}
off = end;
}
tmp_file.sync_data().map_err(DedupError::Sync)?;
drop(target_data);
drop(tmp_data);
drop(tmp_file);
std::fs::rename(&tmp_path, target).map_err(DedupError::Rename)?;
let new_md = std::fs::metadata(target).map_err(DedupError::Stat)?;
Ok(DedupStats {
apparent_before,
apparent_after: new_md.blocks().saturating_mul(512),
diff_pages_written: diff_pages,
})
}
pub fn find_best_sibling(
snapshots_dir: &Path,
fresh_snap_dir: &Path,
image: &str,
memory_mib: u32,
baked_by_version: &str,
) -> std::io::Result<Option<std::path::PathBuf>> {
let fresh_canonical = std::fs::canonicalize(fresh_snap_dir).ok();
let fresh_size = std::fs::metadata(fresh_snap_dir.join("restore.snap"))
.map(|m| m.len())
.ok();
let Some(fresh_size) = fresh_size else {
return Ok(None);
};
let entries = std::fs::read_dir(snapshots_dir)?;
let mut best: Option<(u64, std::path::PathBuf)> = None;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
if fresh_canonical
.as_ref()
.and_then(|c| std::fs::canonicalize(&path).ok().map(|p| p == *c))
.unwrap_or(false)
{
continue;
}
let restore_snap = path.join("restore.snap");
let Ok(stat) = std::fs::metadata(&restore_snap) else {
continue;
};
if stat.len() != fresh_size {
continue;
}
let Ok(text) = std::fs::read_to_string(path.join("metadata.json")) else {
continue;
};
let Ok(meta): Result<serde_json::Value, _> = serde_json::from_str(&text) else {
continue;
};
if meta.get("image").and_then(|v| v.as_str()) != Some(image) {
continue;
}
if meta.get("memory_mib").and_then(|v| v.as_u64()) != Some(memory_mib as u64) {
continue;
}
if meta.get("baked_by_version").and_then(|v| v.as_str()) != Some(baked_by_version) {
continue;
}
let mtime = stat
.modified()
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
.map(|d| d.as_secs())
.unwrap_or(0);
if best.as_ref().is_none_or(|(m, _)| mtime > *m) {
best = Some((mtime, restore_snap));
}
}
Ok(best.map(|(_, p)| p))
}
pub fn auto_dedup_on_bake(
snapshots_dir: &Path,
fresh_snap_dir: &Path,
image: &str,
memory_mib: u32,
baked_by_version: &str,
) -> Option<DedupStats> {
if std::env::var("SUPERMACHINE_AUTO_DEDUP").as_deref() == Ok("0") {
return None;
}
let candidate = match find_best_sibling(
snapshots_dir,
fresh_snap_dir,
image,
memory_mib,
baked_by_version,
) {
Ok(Some(p)) => p,
Ok(None) => return None,
Err(_) => return None,
};
let target = fresh_snap_dir.join("restore.snap");
let t0 = std::time::Instant::now();
let result = dedup_against(&candidate, &target);
let trace = crate::trace::enabled("dedup") || crate::trace::enabled("bake");
match result {
Ok(stats) => {
if trace {
eprintln!(
"[auto-dedup] OK in {:?}: {} → {} (diff_pages={}; canonical={})",
t0.elapsed(),
fmt_bytes(stats.apparent_before),
fmt_bytes(stats.apparent_after),
stats.diff_pages_written,
candidate.display()
);
}
Some(stats)
}
Err(e) => {
if trace {
eprintln!(
"[auto-dedup] FAIL in {:?}: {e} (canonical={}); fresh snapshot retained",
t0.elapsed(),
candidate.display()
);
}
None
}
}
}
fn fmt_bytes(n: u64) -> String {
let g = 1024u64.pow(3);
let m = 1024u64.pow(2);
let k = 1024u64;
if n >= g {
format!("{:.1} GiB", n as f64 / g as f64)
} else if n >= m {
format!("{:.0} MiB", n as f64 / m as f64)
} else if n >= k {
format!("{:.0} KiB", n as f64 / k as f64)
} else {
format!("{n} B")
}
}
struct Mmap {
ptr: *const u8,
len: usize,
}
impl std::ops::Deref for Mmap {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
}
}
impl Drop for Mmap {
fn drop(&mut self) {
unsafe {
libc::munmap(self.ptr as *mut libc::c_void, self.len);
}
}
}
unsafe impl Send for Mmap {}
unsafe impl Sync for Mmap {}
fn mmap_readonly(path: &Path, expected_len: u64) -> Result<Mmap, DedupError> {
use std::os::unix::io::AsRawFd;
let file = std::fs::File::open(path).map_err(DedupError::Mmap)?;
let md = file.metadata().map_err(DedupError::Stat)?;
if md.len() != expected_len {
return Err(DedupError::SizeMismatch {
canon: expected_len,
target: md.len(),
});
}
let len = md.len() as usize;
if len == 0 {
return Err(DedupError::EmptyFile);
}
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
len,
libc::PROT_READ,
libc::MAP_PRIVATE,
file.as_raw_fd(),
0,
)
};
if ptr == libc::MAP_FAILED {
return Err(DedupError::Mmap(std::io::Error::last_os_error()));
}
Ok(Mmap {
ptr: ptr as *const u8,
len,
})
}