use std::io::{Read as _, Seek as _, SeekFrom};
use std::path::Path;
use arcbox_constants::paths::{
CNI_DATA_MOUNT_POINT, CONTAINERD_DATA_MOUNT_POINT, DOCKER_DATA_MOUNT_POINT,
K3S_DATA_MOUNT_POINT, KUBELET_DATA_MOUNT_POINT,
};
use super::cmdline::docker_data_device;
const BTRFS_MAGIC: [u8; 8] = [0x5f, 0x42, 0x48, 0x52, 0x66, 0x53, 0x5f, 0x4d];
const BTRFS_MAGIC_OFFSET: u64 = 0x10040;
const BTRFS_TEMP_MOUNT: &str = "/run/arcbox/data";
fn has_btrfs_superblock(device: &str) -> bool {
let mut file = match std::fs::File::open(device) {
Ok(file) => file,
Err(_) => return false,
};
if file.seek(SeekFrom::Start(BTRFS_MAGIC_OFFSET)).is_err() {
return false;
}
let mut magic = [0_u8; 8];
if file.read_exact(&mut magic).is_err() {
return false;
}
magic == BTRFS_MAGIC
}
fn ensure_btrfs_format(device: &str) -> Result<String, String> {
if has_btrfs_superblock(device) {
return Ok("data device already Btrfs".to_string());
}
let binary = "/sbin/mkfs.btrfs";
if !Path::new(binary).exists() {
return Err(format!("{} not found in EROFS rootfs", binary));
}
match std::process::Command::new(binary)
.args(["-f", device])
.status()
{
Ok(status) if status.success() => Ok(format!("formatted {} as Btrfs", device)),
Ok(status) => Err(format!(
"mkfs.btrfs failed on {} (exit={})",
device,
status.code().unwrap_or(-1)
)),
Err(e) => Err(format!("failed to execute mkfs.btrfs: {}", e)),
}
}
pub(super) fn ensure_data_mount() -> Result<String, String> {
if crate::mount::is_mounted(DOCKER_DATA_MOUNT_POINT)
&& crate::mount::is_mounted(CONTAINERD_DATA_MOUNT_POINT)
&& crate::mount::is_mounted(K3S_DATA_MOUNT_POINT)
&& crate::mount::is_mounted(KUBELET_DATA_MOUNT_POINT)
&& crate::mount::is_mounted(CNI_DATA_MOUNT_POINT)
{
return Ok("data subvolumes already mounted".to_string());
}
let device = docker_data_device();
{
let mut attempts = 0;
while !Path::new(&device).exists() {
attempts += 1;
if attempts > 50 {
return Err(format!("data device {} not available after 5 s", device));
}
std::thread::sleep(std::time::Duration::from_millis(100));
}
if attempts > 0 {
tracing::info!(device, attempts, "waited for data device");
}
}
match ensure_btrfs_format(&device) {
Ok(note) => tracing::info!("{}", note),
Err(e) => return Err(e),
}
if !crate::mount::is_mounted(BTRFS_TEMP_MOUNT) {
if let Err(e) = std::fs::create_dir_all(BTRFS_TEMP_MOUNT) {
return Err(format!("failed to create {}: {}", BTRFS_TEMP_MOUNT, e));
}
match std::process::Command::new("/bin/busybox")
.args([
"mount",
"-t",
"btrfs",
"-o",
"compress=zstd:3,discard=async",
&device,
BTRFS_TEMP_MOUNT,
])
.status()
{
Ok(s) if s.success() => {}
Ok(s) => {
return Err(format!(
"mount -t btrfs {} {} failed (exit={})",
device,
BTRFS_TEMP_MOUNT,
s.code().unwrap_or(-1)
));
}
Err(e) => return Err(format!("mount exec failed: {}", e)),
}
}
let mut notes = Vec::new();
match btrfs_resize_max(BTRFS_TEMP_MOUNT) {
Ok(note) => notes.push(note),
Err(e) => {
tracing::error!(error = %e, "btrfs resize max failed");
notes.push(format!("resize failed: {}", e));
}
}
for subvol in ["@docker", "@containerd", "@k3s", "@kubelet", "@cni"] {
let subvol_path = format!("{}/{}", BTRFS_TEMP_MOUNT, subvol);
if Path::new(&subvol_path).exists() {
continue;
}
if let Err(e) = btrfs_create_subvolume(&subvol_path) {
return Err(format!("failed to create subvolume {}: {}", subvol, e));
}
}
for (subvol, target) in [
("@docker", DOCKER_DATA_MOUNT_POINT),
("@containerd", CONTAINERD_DATA_MOUNT_POINT),
("@k3s", K3S_DATA_MOUNT_POINT),
("@kubelet", KUBELET_DATA_MOUNT_POINT),
("@cni", CNI_DATA_MOUNT_POINT),
] {
if crate::mount::is_mounted(target) {
continue;
}
if let Err(e) = std::fs::create_dir_all(target) {
return Err(format!("failed to create {}: {}", target, e));
}
let opts = format!(
"compress=zstd:1,discard=async,noatime,space_cache=v2,subvol={}",
subvol
);
match std::process::Command::new("/bin/busybox")
.args(["mount", "-t", "btrfs", "-o", &opts, &device, target])
.status()
{
Ok(s) if s.success() => {
disable_cow_on_metadata_dirs(target);
notes.push(format!("mounted {} -> {}", subvol, target));
}
Ok(s) => {
return Err(format!(
"mount subvol={} {} failed (exit={})",
subvol,
target,
s.code().unwrap_or(-1)
));
}
Err(e) => return Err(format!("mount exec failed: {}", e)),
}
}
if notes.is_empty() {
Ok("data subvolumes already mounted".to_string())
} else {
Ok(notes.join("; "))
}
}
fn disable_cow_on_metadata_dirs(mount_point: &str) {
const FS_NOCOW_FL: libc::c_long = 0x0080_0000;
let metadata_subdirs = [
"io.containerd.metadata.v1.bolt",
"io.containerd.snapshotter.v1.overlayfs",
"containerd",
"network",
"builder",
"buildkit",
"image",
"trust",
];
for subdir in &metadata_subdirs {
let path = format!("{}/{}", mount_point, subdir);
let _ = std::fs::create_dir_all(&path);
let Ok(cpath) = std::ffi::CString::new(path.as_str()) else {
continue;
};
let fd = unsafe { libc::open(cpath.as_ptr(), libc::O_RDONLY | libc::O_DIRECTORY) };
if fd < 0 {
continue;
}
let mut flags: libc::c_long = 0;
unsafe {
#[allow(clippy::cast_possible_wrap)]
let get_flags = 0x8008_6601u32 as libc::Ioctl; #[allow(clippy::cast_possible_wrap)]
let set_flags = 0x4008_6602u32 as libc::Ioctl; if libc::ioctl(fd, get_flags, &mut flags) == 0 {
flags |= FS_NOCOW_FL;
if libc::ioctl(fd, set_flags, &flags) == 0 {
tracing::debug!("set NOCOW on {}", path);
}
}
libc::close(fd);
}
}
}
nix::ioctl_write_ptr!(btrfs_ioc_subvol_create, 0x94, 14, [u8; 4096]);
nix::ioctl_write_ptr!(btrfs_ioc_resize, 0x94, 3, [u8; 4096]);
fn build_resize_args(devid: i64, size: &str) -> Result<[u8; 4096], String> {
let size_bytes = size.as_bytes();
if size_bytes.len() >= 4088 {
return Err("resize size token too long".to_string());
}
let mut args = [0u8; 4096];
args[0..8].copy_from_slice(&devid.to_le_bytes());
args[8..8 + size_bytes.len()].copy_from_slice(size_bytes);
Ok(args)
}
fn btrfs_resize_max(mount_point: &str) -> Result<String, String> {
use std::os::unix::io::AsRawFd;
let dir = std::fs::File::open(mount_point)
.map_err(|e| format!("open {} for resize: {}", mount_point, e))?;
let args = build_resize_args(1, "max")?;
unsafe { btrfs_ioc_resize(dir.as_raw_fd(), &args) }
.map_err(|e| format!("BTRFS_IOC_RESIZE max on {}: {}", mount_point, e))?;
tracing::info!(mount_point, "btrfs resize max succeeded");
Ok(format!("resized {} to device max", mount_point))
}
fn btrfs_create_subvolume(path: &str) -> Result<(), String> {
use std::os::unix::io::AsRawFd;
let parent = Path::new(path)
.parent()
.ok_or_else(|| "no parent directory".to_string())?;
let name = Path::new(path)
.file_name()
.ok_or_else(|| "no subvolume name".to_string())?
.to_str()
.ok_or_else(|| "invalid subvolume name".to_string())?;
let parent_dir =
std::fs::File::open(parent).map_err(|e| format!("open {}: {}", parent.display(), e))?;
let mut args = [0u8; 4096];
let name_bytes = name.as_bytes();
if name_bytes.len() >= 4088 {
return Err("subvolume name too long".to_string());
}
args[8..8 + name_bytes.len()].copy_from_slice(name_bytes);
unsafe { btrfs_ioc_subvol_create(parent_dir.as_raw_fd(), &args) }
.map_err(|e| format!("BTRFS_IOC_SUBVOL_CREATE: {}", e))?;
tracing::info!("created Btrfs subvolume {}", path);
Ok(())
}
#[cfg(test)]
mod tests {
use super::build_resize_args;
#[test]
fn resize_args_layout_matches_kernel_struct() {
let args = build_resize_args(1, "max").unwrap();
assert_eq!(&args[0..8], &1i64.to_le_bytes());
assert_eq!(&args[8..11], b"max");
assert_eq!(args[11], 0);
assert!(args[12..].iter().all(|b| *b == 0));
}
#[test]
fn resize_args_rejects_oversized_token() {
let huge = "x".repeat(4088);
assert!(build_resize_args(1, &huge).is_err());
}
}