use std::collections::{HashMap, HashSet};
use std::env;
use std::path::Path;
use std::process::{Command, Stdio};
use std::sync::OnceLock;
use anyhow::{Context, Result, anyhow};
use camino::{Utf8Path, Utf8PathBuf};
use cap_std_ext::cap_std::fs::Dir;
use fn_error_context::context;
use serde::Deserialize;
use bootc_utils::CommandRunExt;
fn have_udev() -> bool {
static HAVE_UDEV: OnceLock<bool> = OnceLock::new();
*HAVE_UDEV.get_or_init(|| {
let r = Path::new("/run/udev/data").exists();
if !r {
tracing::debug!(
"udev database not available, will use blkid -p for partition metadata"
);
}
r
})
}
fn blkid_probe(dev: &str) -> Result<HashMap<String, String>> {
let mut cmd = Command::new("blkid");
cmd.args(["-p", "-o", "export"]).arg(dev);
cmd.log_debug();
let output = cmd.output().context("Failed to run blkid")?;
if !output.status.success() {
if output.status.code() == Some(2) {
return Ok(HashMap::new());
}
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!(
"blkid -p failed on {dev} (exit status {}): {stderr}",
output.status
);
}
let text = String::from_utf8(output.stdout).context("blkid output is not UTF-8")?;
let mut props = HashMap::new();
for line in text.lines() {
if let Some((key, value)) = line.split_once('=') {
props.insert(key.to_string(), value.to_string());
}
}
Ok(props)
}
pub const ESP_ID_MBR: &[u8] = &[0x06, 0xEF];
pub const ESP: &str = "c12a7328-f81f-11d2-ba4b-00a0c93ec93b";
pub const BIOS_BOOT: &str = "21686148-6449-6e6f-744e-656564454649";
#[derive(Debug, Deserialize)]
struct DevicesOutput {
blockdevices: Vec<Device>,
}
#[allow(dead_code)]
#[derive(Debug, Clone, serde::Serialize, Deserialize)]
pub struct Device {
pub name: String,
pub serial: Option<String>,
pub model: Option<String>,
pub partlabel: Option<String>,
pub parttype: Option<String>,
pub partuuid: Option<String>,
pub partn: Option<u32>,
pub children: Option<Vec<Device>>,
pub size: u64,
#[serde(rename = "maj:min")]
pub maj_min: Option<String>,
pub start: Option<u64>,
pub label: Option<String>,
pub fstype: Option<String>,
pub uuid: Option<String>,
pub path: Option<String>,
pub pttype: Option<String>,
}
impl Device {
pub fn path(&self) -> String {
self.path.clone().unwrap_or(format!("/dev/{}", &self.name))
}
#[allow(dead_code)]
pub fn node(&self) -> String {
self.path()
}
#[allow(dead_code)]
pub fn has_children(&self) -> bool {
self.children.as_ref().is_some_and(|v| !v.is_empty())
}
pub fn is_mpath(&self) -> Result<bool> {
let dm_path = Utf8PathBuf::from_path_buf(std::fs::canonicalize(self.path())?)
.map_err(|_| anyhow::anyhow!("Non-UTF8 path"))?;
let dm_name = dm_path.file_name().unwrap_or("");
let uuid_path = Utf8PathBuf::from(format!("/sys/class/block/{dm_name}/dm/uuid"));
if uuid_path.exists() {
let uuid = std::fs::read_to_string(&uuid_path)
.with_context(|| format!("Failed to read {uuid_path}"))?;
if uuid.trim_start().starts_with("mpath-") {
return Ok(true);
}
}
Ok(false)
}
pub fn get_esp_partition_number(&self) -> Result<String> {
let esp_device = self.find_partition_of_esp()?;
let devname = &esp_device.name;
let partition_path = Utf8PathBuf::from(format!("/sys/class/block/{devname}/partition"));
if partition_path.exists() {
return std::fs::read_to_string(&partition_path)
.with_context(|| format!("Failed to read {partition_path}"));
}
if self.is_mpath()? {
if let Some(partn) = esp_device.partn {
return Ok(partn.to_string());
}
}
anyhow::bail!("Not supported for {devname}")
}
pub fn find_partition_of_bios_boot(&self) -> Option<&Device> {
self.find_partition_of_type(BIOS_BOOT)
}
pub fn find_colocated_esps(&self) -> Result<Option<Vec<Device>>> {
let mut esps = Vec::new();
for root in &self.find_all_roots()? {
if let Some(esp) = root.find_partition_of_esp_optional()? {
esps.push(esp.clone());
}
}
Ok((!esps.is_empty()).then_some(esps))
}
pub fn find_first_colocated_esp(&self) -> Result<Device> {
self.find_colocated_esps()?
.and_then(|mut v| Some(v.remove(0)))
.ok_or_else(|| anyhow!("No ESP partition found among backing devices"))
}
pub fn find_colocated_bios_boot(&self) -> Result<Option<Vec<Device>>> {
let bios_boots: Vec<_> = self
.find_all_roots()?
.iter()
.filter_map(|root| root.find_partition_of_bios_boot())
.cloned()
.collect();
Ok((!bios_boots.is_empty()).then_some(bios_boots))
}
pub fn find_partition_of_type(&self, parttype: &str) -> Option<&Device> {
self.children.as_ref()?.iter().find(|child| {
child
.parttype
.as_ref()
.is_some_and(|pt| pt.eq_ignore_ascii_case(parttype))
})
}
pub fn find_partition_of_esp_optional(&self) -> Result<Option<&Device>> {
let Some(children) = self.children.as_ref() else {
return Ok(None);
};
let direct = match self.pttype.as_deref() {
Some("dos") => children.iter().find(|child| {
child
.parttype
.as_ref()
.and_then(|pt| {
let pt = pt.strip_prefix("0x").unwrap_or(pt);
u8::from_str_radix(pt, 16).ok()
})
.is_some_and(|pt| ESP_ID_MBR.contains(&pt))
}),
Some("gpt") | None => self.find_partition_of_type(ESP),
Some(other) => return Err(anyhow!("Unsupported partition table type: {other}")),
};
if direct.is_some() {
return Ok(direct);
}
for child in children {
if child.pttype.is_some() {
if let Some(esp) = child.find_partition_of_esp_optional()? {
return Ok(Some(esp));
}
}
}
Ok(None)
}
pub fn find_partition_of_esp(&self) -> Result<&Device> {
self.find_partition_of_esp_optional()?
.ok_or_else(|| anyhow!("ESP partition not found on {}", self.path()))
}
pub fn find_device_by_partno(&self, partno: u32) -> Result<&Device> {
self.children
.as_ref()
.ok_or_else(|| anyhow!("Device has no children"))?
.iter()
.find(|child| child.partn == Some(partno))
.ok_or_else(|| anyhow!("Missing partition for index {partno}"))
}
pub fn refresh(&mut self) -> Result<()> {
let path = self.path();
let new_device = list_dev(Utf8Path::new(&path))?;
*self = new_device;
Ok(())
}
fn read_sysfs_property<T>(&self, property: &str) -> Result<Option<T>>
where
T: std::str::FromStr,
T::Err: std::error::Error + Send + Sync + 'static,
{
let Some(majmin) = self.maj_min.as_deref() else {
return Ok(None);
};
let sysfs_path = format!("/sys/dev/block/{majmin}/{property}");
if !Utf8Path::new(&sysfs_path).try_exists()? {
return Ok(None);
}
let value = std::fs::read_to_string(&sysfs_path)
.with_context(|| format!("Reading {sysfs_path}"))?;
let parsed = value
.trim()
.parse()
.with_context(|| format!("Parsing sysfs {property} property"))?;
tracing::debug!("backfilled {property} to {value}");
Ok(Some(parsed))
}
pub fn backfill_missing(&mut self) -> Result<()> {
if self.start.is_none() {
self.start = self.read_sysfs_property("start")?;
}
if self.partn.is_none() {
self.partn = self.read_sysfs_property("partition")?;
}
if !have_udev() && (self.parttype.is_none() || self.pttype.is_none()) {
let props = blkid_probe(&self.path())?;
if self.parttype.is_none() {
self.parttype = props.get("PART_ENTRY_TYPE").cloned();
}
if self.pttype.is_none() {
self.pttype = props.get("PTTYPE").cloned();
}
}
for child in self.children.iter_mut().flatten() {
child.backfill_missing()?;
}
Ok(())
}
pub fn list_parents(&self) -> Result<Option<Vec<Device>>> {
let path = self.path();
let output: DevicesOutput = Command::new("lsblk")
.args(["-J", "-b", "-O", "--inverse"])
.arg(&path)
.log_debug()
.run_and_parse_json()?;
let device = output
.blockdevices
.into_iter()
.next()
.ok_or_else(|| anyhow!("no device output from lsblk --inverse for {path}"))?;
match device.children {
Some(mut children) if !children.is_empty() => {
for child in &mut children {
child.backfill_missing()?;
}
Ok(Some(children))
}
_ => Ok(None),
}
}
pub fn require_single_root(&self) -> Result<Device> {
let mut roots = self.find_all_roots()?;
match roots.len() {
1 => Ok(roots.remove(0)),
n => anyhow::bail!(
"Expected a single root device for {}, but found {n}",
self.path()
),
}
}
pub fn find_all_roots(&self) -> Result<Vec<Device>> {
let Some(parents) = self.list_parents()? else {
return Ok(vec![list_dev(Utf8Path::new(&self.path()))?]);
};
let mut roots = Vec::new();
let mut seen = HashSet::new();
let mut queue = parents;
while let Some(mut device) = queue.pop() {
match device.children.take() {
Some(grandparents) if !grandparents.is_empty() => {
queue.extend(grandparents);
}
_ => {
let name = device.name.clone();
if seen.insert(name) {
roots.push(list_dev(Utf8Path::new(&device.path()))?);
}
}
}
}
Ok(roots)
}
}
#[context("Listing device {dev}")]
pub fn list_dev(dev: &Utf8Path) -> Result<Device> {
let mut devs: DevicesOutput = Command::new("lsblk")
.args(["-J", "-b", "-O"])
.arg(dev)
.log_debug()
.run_and_parse_json()?;
for dev in devs.blockdevices.iter_mut() {
dev.backfill_missing()?;
}
devs.blockdevices
.into_iter()
.next()
.ok_or_else(|| anyhow!("no device output from lsblk for {dev}"))
}
#[context("Finding block device for ZFS dataset {dataset}")]
fn list_dev_for_zfs_dataset(dataset: &str) -> Result<Device> {
let dataset = dataset.strip_prefix("ZFS=").unwrap_or(dataset);
let pool = dataset
.split('/')
.next()
.ok_or_else(|| anyhow!("Invalid ZFS dataset: {dataset}"))?;
let output = Command::new("zpool")
.args(["list", "-H", "-v", "-P", pool])
.run_get_string()
.with_context(|| format!("Querying ZFS pool {pool}"))?;
for line in output.lines() {
if line.starts_with('\t') || line.starts_with(' ') {
let dev_path = line.trim_start().split('\t').next().unwrap_or("").trim();
if dev_path.starts_with('/') {
return list_dev(Utf8Path::new(dev_path));
}
}
}
anyhow::bail!("Could not find a block device backing ZFS pool {pool}")
}
pub fn list_dev_by_dir(dir: &Dir) -> Result<Device> {
let fsinfo = bootc_mount::inspect_filesystem_of_dir(dir)?;
let source = &fsinfo.source;
if fsinfo.fstype == "zfs" || source.starts_with("ZFS=") {
return list_dev_for_zfs_dataset(source);
}
list_dev(&Utf8PathBuf::from(source))
}
pub struct LoopbackDevice {
pub dev: Option<Utf8PathBuf>,
cleanup_handle: Option<LoopbackCleanupHandle>,
}
struct LoopbackCleanupHandle {
child: std::process::Child,
}
impl LoopbackDevice {
pub fn new(path: &Path) -> Result<Self> {
let direct_io = match env::var("BOOTC_DIRECT_IO") {
Ok(val) => {
if val == "on" {
"on"
} else {
"off"
}
}
Err(_e) => "off",
};
let dev = Command::new("losetup")
.args([
"--show",
format!("--direct-io={direct_io}").as_str(),
"-P",
"--find",
])
.arg(path)
.run_get_string()?;
let dev = Utf8PathBuf::from(dev.trim());
tracing::debug!("Allocated loopback {dev}");
let cleanup_handle = match Self::spawn_cleanup_helper(dev.as_str()) {
Ok(handle) => Some(handle),
Err(e) => {
tracing::warn!(
"Failed to spawn loopback cleanup helper for {}: {}. \
Loopback device may not be cleaned up if process is interrupted.",
dev,
e
);
None
}
};
Ok(Self {
dev: Some(dev),
cleanup_handle,
})
}
pub fn path(&self) -> &Utf8Path {
self.dev.as_deref().unwrap()
}
fn spawn_cleanup_helper(device_path: &str) -> Result<LoopbackCleanupHandle> {
let bootc_path = bootc_utils::reexec::executable_path()
.context("Failed to locate bootc binary for cleanup helper")?;
let mut cmd = Command::new(bootc_path);
cmd.args([
"internals",
"loopback-cleanup-helper",
"--device",
device_path,
]);
cmd.env("BOOTC_LOOPBACK_CLEANUP_HELPER", "1");
cmd.stdin(Stdio::null());
cmd.stdout(Stdio::null());
let child = cmd
.spawn()
.context("Failed to spawn loopback cleanup helper")?;
Ok(LoopbackCleanupHandle { child })
}
fn impl_close(&mut self) -> Result<()> {
let Some(dev) = self.dev.take() else {
tracing::trace!("loopback device already deallocated");
return Ok(());
};
if let Some(mut cleanup_handle) = self.cleanup_handle.take() {
let _ = cleanup_handle.child.kill();
}
Command::new("losetup")
.args(["-d", dev.as_str()])
.run_capture_stderr()
}
pub fn close(mut self) -> Result<()> {
self.impl_close()
}
}
impl Drop for LoopbackDevice {
fn drop(&mut self) {
let _ = self.impl_close();
}
}
pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> {
if std::env::var("BOOTC_LOOPBACK_CLEANUP_HELPER").is_err() {
anyhow::bail!("This function should only be called as a cleanup helper");
}
rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::TERM))
.context("Failed to set parent death signal")?;
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("Failed to create signal stream")
.recv()
.await;
let output = std::process::Command::new("losetup")
.args(["-d", device_path])
.output();
match output {
Ok(output) if output.status.success() => {
tracing::info!("Cleaned up leaked loopback device {}", device_path);
std::process::exit(0);
}
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::error!(
"Failed to clean up loopback device {}: {}. Stderr: {}",
device_path,
output.status,
stderr.trim()
);
std::process::exit(1);
}
Err(e) => {
tracing::error!(
"Error executing losetup to clean up loopback device {}: {}",
device_path,
e
);
std::process::exit(1);
}
}
}
pub fn parse_size_mib(mut s: &str) -> Result<u64> {
let suffixes = [
("MiB", 1u64),
("M", 1u64),
("GiB", 1024),
("G", 1024),
("TiB", 1024 * 1024),
("T", 1024 * 1024),
];
let mut mul = 1u64;
for (suffix, imul) in suffixes {
if let Some((sv, rest)) = s.rsplit_once(suffix) {
if !rest.is_empty() {
anyhow::bail!("Trailing text after size: {rest}");
}
s = sv;
mul = imul;
}
}
let v = s.parse::<u64>()?;
Ok(v * mul)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_parse_size_mib() {
let ident_cases = [0, 10, 9, 1024].into_iter().map(|k| (k.to_string(), k));
let cases = [
("0M", 0),
("10M", 10),
("10MiB", 10),
("1G", 1024),
("9G", 9216),
("11T", 11 * 1024 * 1024),
]
.into_iter()
.map(|(k, v)| (k.to_string(), v));
for (s, v) in ident_cases.chain(cases) {
assert_eq!(parse_size_mib(&s).unwrap(), v as u64, "Parsing {s}");
}
}
#[test]
fn test_parse_lsblk() {
let fixture = include_str!("../tests/fixtures/lsblk.json");
let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
let dev = devs.blockdevices.into_iter().next().unwrap();
assert_eq!(dev.partn, None);
let children = dev.children.as_deref().unwrap();
assert_eq!(children.len(), 3);
let first_child = &children[0];
assert_eq!(first_child.partn, Some(1));
assert_eq!(
first_child.parttype.as_deref().unwrap(),
"21686148-6449-6e6f-744e-656564454649"
);
assert_eq!(
first_child.partuuid.as_deref().unwrap(),
"3979e399-262f-4666-aabc-7ab5d3add2f0"
);
let part2 = dev.find_device_by_partno(2).unwrap();
assert_eq!(part2.partn, Some(2));
assert_eq!(part2.parttype.as_deref().unwrap(), ESP);
let esp = dev.find_partition_of_esp().unwrap();
assert_eq!(esp.partn, Some(2));
let bios = dev.find_partition_of_bios_boot().unwrap();
assert_eq!(bios.partn, Some(1));
assert_eq!(bios.parttype.as_deref().unwrap(), BIOS_BOOT);
}
#[test]
fn test_parse_lsblk_no_udev() {
let fixture = include_str!("../tests/fixtures/lsblk-no-udev.json");
let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
let dev = devs.blockdevices.into_iter().next().unwrap();
assert!(dev.pttype.is_none());
let children = dev.children.as_deref().unwrap();
assert_eq!(children.len(), 3);
assert!(children[0].parttype.is_none());
assert!(children[1].parttype.is_none());
assert!(children[2].parttype.is_none());
assert!(dev.find_partition_of_esp_optional().unwrap().is_none());
assert!(dev.find_partition_of_bios_boot().is_none());
}
#[test]
fn test_parse_lsblk_mbr() {
let fixture = include_str!("../tests/fixtures/lsblk-mbr.json");
let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
let dev = devs.blockdevices.into_iter().next().unwrap();
assert_eq!(dev.partn, None);
assert_eq!(dev.pttype.as_deref().unwrap(), "dos");
let children = dev.children.as_deref().unwrap();
assert_eq!(children.len(), 3);
let first_child = &children[0];
assert_eq!(first_child.partn, Some(1));
assert_eq!(first_child.parttype.as_deref().unwrap(), "0x06");
assert_eq!(first_child.partuuid.as_deref().unwrap(), "a1b2c3d4-01");
assert_eq!(first_child.fstype.as_deref().unwrap(), "vfat");
assert!(first_child.partlabel.is_none());
let second_child = &children[1];
assert_eq!(second_child.partn, Some(2));
assert_eq!(second_child.parttype.as_deref().unwrap(), "0x83");
assert_eq!(second_child.partuuid.as_deref().unwrap(), "a1b2c3d4-02");
let third_child = &children[2];
assert_eq!(third_child.partn, Some(3));
assert_eq!(third_child.parttype.as_deref().unwrap(), "0xef");
assert_eq!(third_child.partuuid.as_deref().unwrap(), "a1b2c3d4-03");
let part1 = dev.find_device_by_partno(1).unwrap();
assert_eq!(part1.partn, Some(1));
let esp = dev.find_partition_of_esp().unwrap();
assert_eq!(esp.partn, Some(1));
}
fn make_mbr_disk(parttypes: &[&str]) -> Device {
Device {
name: "vda".into(),
serial: None,
model: None,
partlabel: None,
parttype: None,
partuuid: None,
partn: None,
size: 10737418240,
maj_min: None,
start: None,
label: None,
fstype: None,
uuid: None,
path: Some("/dev/vda".into()),
pttype: Some("dos".into()),
children: Some(
parttypes
.iter()
.enumerate()
.map(|(i, pt)| Device {
name: format!("vda{}", i + 1),
serial: None,
model: None,
partlabel: None,
parttype: Some(pt.to_string()),
partuuid: None,
partn: Some(i as u32 + 1),
size: 1048576,
maj_min: None,
start: Some(2048),
label: None,
fstype: None,
uuid: None,
path: None,
pttype: Some("dos".into()),
children: None,
})
.collect(),
),
}
}
#[test]
fn test_parse_lsblk_vroc() {
let fixture = include_str!("../tests/fixtures/lsblk-vroc.json");
let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
assert_eq!(devs.blockdevices.len(), 2);
for nvme in &devs.blockdevices {
let esp = nvme.find_partition_of_esp().unwrap();
assert_eq!(esp.name, "md126p1");
assert_eq!(esp.partn, Some(1));
assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
}
}
#[test]
fn test_parse_lsblk_swraid() {
let fixture = include_str!("../tests/fixtures/lsblk-swraid.json");
let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
assert_eq!(devs.blockdevices.len(), 2);
let sda = &devs.blockdevices[0];
let esp = sda.find_partition_of_esp().unwrap();
assert_eq!(esp.name, "sda1");
assert_eq!(esp.partn, Some(1));
assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
let sdb = &devs.blockdevices[1];
let esp = sdb.find_partition_of_esp().unwrap();
assert_eq!(esp.name, "sdb1");
assert_eq!(esp.partn, Some(1));
assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
let sda3 = sda
.children
.as_ref()
.unwrap()
.iter()
.find(|c| c.name == "sda3")
.unwrap();
assert_eq!(sda3.fstype.as_deref().unwrap(), "linux_raid_member");
let md0 = sda3
.children
.as_ref()
.unwrap()
.iter()
.find(|c| c.name == "md0")
.unwrap();
assert_eq!(md0.fstype.as_deref().unwrap(), "ext4");
}
#[test]
fn test_mbr_esp_detection() {
let dev = make_mbr_disk(&["0x06"]);
assert_eq!(dev.find_partition_of_esp().unwrap().partn, Some(1));
let dev = make_mbr_disk(&["0x83", "0xef"]);
assert_eq!(dev.find_partition_of_esp().unwrap().partn, Some(2));
let dev = make_mbr_disk(&["0x83", "0x82"]);
assert!(dev.find_partition_of_esp().is_err());
}
}