Skip to main content

bootc_internal_blockdev/
blockdev.rs

1use std::collections::{HashMap, HashSet};
2use std::env;
3use std::path::Path;
4use std::process::{Command, Stdio};
5use std::sync::OnceLock;
6
7use anyhow::{Context, Result, anyhow};
8use camino::{Utf8Path, Utf8PathBuf};
9use cap_std_ext::cap_std::fs::Dir;
10use fn_error_context::context;
11use serde::Deserialize;
12
13use bootc_utils::CommandRunExt;
14
15/// Check whether the udev database is accessible (cached for the process lifetime).
16///
17/// When running inside a container or sandbox without `/run/udev`
18/// bind-mounted, tools like `lsblk` that depend on the udev database
19/// will return null for fields like `parttype` and `fstype`.
20///
21/// We check for `/run/udev/data` (the actual database directory) rather
22/// than just `/run/udev` because the parent directory can exist as an
23/// empty mount point without the database being populated.
24fn have_udev() -> bool {
25    static HAVE_UDEV: OnceLock<bool> = OnceLock::new();
26    *HAVE_UDEV.get_or_init(|| {
27        let r = Path::new("/run/udev/data").exists();
28        if !r {
29            tracing::debug!(
30                "udev database not available, will use blkid -p for partition metadata"
31            );
32        }
33        r
34    })
35}
36
37/// Probe a device with `blkid -p` and return all discovered properties
38/// as key-value pairs.
39///
40/// This uses the `export` output format (`KEY=value`, one per line) to
41/// retrieve all tags in a single invocation, rather than spawning blkid
42/// once per property.
43///
44/// Returns `Ok(empty map)` if blkid exits with code 2 (no tags found,
45/// e.g. the device is a whole disk). Other non-zero exits are propagated
46/// as errors.
47fn blkid_probe(dev: &str) -> Result<HashMap<String, String>> {
48    let mut cmd = Command::new("blkid");
49    cmd.args(["-p", "-o", "export"]).arg(dev);
50    cmd.log_debug();
51    let output = cmd.output().context("Failed to run blkid")?;
52    if !output.status.success() {
53        // blkid exits with 2 when no tags are found (e.g. whole disk)
54        if output.status.code() == Some(2) {
55            return Ok(HashMap::new());
56        }
57        let stderr = String::from_utf8_lossy(&output.stderr);
58        anyhow::bail!(
59            "blkid -p failed on {dev} (exit status {}): {stderr}",
60            output.status
61        );
62    }
63    let text = String::from_utf8(output.stdout).context("blkid output is not UTF-8")?;
64    let mut props = HashMap::new();
65    for line in text.lines() {
66        if let Some((key, value)) = line.split_once('=') {
67            props.insert(key.to_string(), value.to_string());
68        }
69    }
70    Ok(props)
71}
72
73/// MBR partition type IDs that indicate an EFI System Partition.
74/// 0x06 is FAT16 (used as ESP on some MBR systems), 0xEF is the
75/// explicit EFI System Partition type.
76/// Refer to <https://en.wikipedia.org/wiki/Partition_type>
77pub const ESP_ID_MBR: &[u8] = &[0x06, 0xEF];
78
79/// EFI System Partition (ESP) for UEFI boot on GPT
80pub const ESP: &str = "c12a7328-f81f-11d2-ba4b-00a0c93ec93b";
81
82/// BIOS boot partition type GUID for GPT
83pub const BIOS_BOOT: &str = "21686148-6449-6e6f-744e-656564454649";
84
85#[derive(Debug, Deserialize)]
86struct DevicesOutput {
87    blockdevices: Vec<Device>,
88}
89
90#[allow(dead_code)]
91#[derive(Debug, Clone, serde::Serialize, Deserialize)]
92pub struct Device {
93    pub name: String,
94    pub serial: Option<String>,
95    pub model: Option<String>,
96    pub partlabel: Option<String>,
97    pub parttype: Option<String>,
98    pub partuuid: Option<String>,
99    /// Partition number (1-indexed). None for whole disk devices.
100    pub partn: Option<u32>,
101    pub children: Option<Vec<Device>>,
102    pub size: u64,
103    #[serde(rename = "maj:min")]
104    pub maj_min: Option<String>,
105    // NOTE this one is not available on older util-linux, and
106    // will also not exist for whole blockdevs (as opposed to partitions).
107    pub start: Option<u64>,
108
109    // Filesystem-related properties
110    pub label: Option<String>,
111    pub fstype: Option<String>,
112    pub uuid: Option<String>,
113    pub path: Option<String>,
114    /// Partition table type (e.g., "gpt", "dos"). Only present on whole disk devices.
115    pub pttype: Option<String>,
116}
117
118impl Device {
119    // RHEL8's lsblk doesn't have PATH, so we do it
120    pub fn path(&self) -> String {
121        self.path.clone().unwrap_or(format!("/dev/{}", &self.name))
122    }
123
124    /// Alias for path() for compatibility
125    #[allow(dead_code)]
126    pub fn node(&self) -> String {
127        self.path()
128    }
129
130    #[allow(dead_code)]
131    pub fn has_children(&self) -> bool {
132        self.children.as_ref().is_some_and(|v| !v.is_empty())
133    }
134
135    // Check if the device is mpath
136    pub fn is_mpath(&self) -> Result<bool> {
137        let dm_path = Utf8PathBuf::from_path_buf(std::fs::canonicalize(self.path())?)
138            .map_err(|_| anyhow::anyhow!("Non-UTF8 path"))?;
139        let dm_name = dm_path.file_name().unwrap_or("");
140        let uuid_path = Utf8PathBuf::from(format!("/sys/class/block/{dm_name}/dm/uuid"));
141
142        if uuid_path.exists() {
143            let uuid = std::fs::read_to_string(&uuid_path)
144                .with_context(|| format!("Failed to read {uuid_path}"))?;
145            if uuid.trim_start().starts_with("mpath-") {
146                return Ok(true);
147            }
148        }
149        Ok(false)
150    }
151
152    /// Get the numeric partition index of the ESP (e.g. "1", "2").
153    ///
154    /// We read `/sys/class/block/<name>/partition` rather than parsing device
155    /// names because naming conventions vary across disk types (sd, nvme, dm, etc.).
156    /// On multipath devices the sysfs `partition` attribute doesn't exist, so we
157    /// fall back to the `partn` field reported by lsblk, then to parsing the
158    /// partition suffix from the ESP device path relative to the parent device
159    /// path (e.g. parent `/dev/mapper/mpatha`, ESP `/dev/mapper/mpatha2` → `"2"`).
160    pub fn get_esp_partition_number(&self) -> Result<String> {
161        let esp_device = self.find_partition_of_esp()?;
162        let devname = &esp_device.name;
163
164        let partition_path = Utf8PathBuf::from(format!("/sys/class/block/{devname}/partition"));
165        if partition_path.exists() {
166            return std::fs::read_to_string(&partition_path)
167                .with_context(|| format!("Failed to read {partition_path}"));
168        }
169
170        // On multipath the partition attribute is not existing
171        if self.is_mpath()? {
172            if let Some(partn) = esp_device.partn {
173                return Ok(partn.to_string());
174            }
175            // Last resort: strip the parent device path from the ESP device path,
176            // then skip any non-digit separator (e.g. "p") to get the partition number.
177            // For example: parent "/dev/mapper/mpatha", ESP "/dev/mapper/mpatha2" → "2"
178            //              parent "/dev/mapper/mpatha", ESP "/dev/mapper/mpathap2" → "2"
179            let parent_path = self.path();
180            let esp_path = esp_device.path();
181            if let Some(n) = parse_partition_number_from_suffix(&parent_path, &esp_path) {
182                return Ok(n);
183            }
184        }
185        anyhow::bail!("Not supported for {devname}")
186    }
187
188    /// Find BIOS boot partition among children.
189    pub fn find_partition_of_bios_boot(&self) -> Option<&Device> {
190        self.find_partition_of_type(BIOS_BOOT)
191    }
192
193    /// Find all ESP partitions across all root devices backing this device.
194    /// Calls find_all_roots() to discover physical disks, then searches each for an ESP.
195    /// Returns None if no ESPs are found.
196    pub fn find_colocated_esps(&self) -> Result<Option<Vec<Device>>> {
197        let mut esps = Vec::new();
198        for root in &self.find_all_roots()? {
199            if let Some(esp) = root.find_partition_of_esp_optional()? {
200                esps.push(esp.clone());
201            }
202        }
203        Ok((!esps.is_empty()).then_some(esps))
204    }
205
206    /// Find a single ESP partition among all root devices backing this device.
207    ///
208    /// Walks the parent chain to find all backing disks, then looks for ESP
209    /// partitions on each. Returns the first ESP found. This is the common
210    /// case for composefs/UKI boot paths where exactly one ESP is expected.
211    pub fn find_first_colocated_esp(&self) -> Result<Device> {
212        self.find_colocated_esps()?
213            .and_then(|mut v| Some(v.remove(0)))
214            .ok_or_else(|| anyhow!("No ESP partition found among backing devices"))
215    }
216
217    /// Find all BIOS boot partitions across all root devices backing this device.
218    /// Calls find_all_roots() to discover physical disks, then searches each for a BIOS boot partition.
219    /// Returns None if no BIOS boot partitions are found.
220    pub fn find_colocated_bios_boot(&self) -> Result<Option<Vec<Device>>> {
221        let bios_boots: Vec<_> = self
222            .find_all_roots()?
223            .iter()
224            .filter_map(|root| root.find_partition_of_bios_boot())
225            .cloned()
226            .collect();
227        Ok((!bios_boots.is_empty()).then_some(bios_boots))
228    }
229
230    /// Find a child partition by partition type (case-insensitive).
231    pub fn find_partition_of_type(&self, parttype: &str) -> Option<&Device> {
232        self.children.as_ref()?.iter().find(|child| {
233            child
234                .parttype
235                .as_ref()
236                .is_some_and(|pt| pt.eq_ignore_ascii_case(parttype))
237        })
238    }
239
240    /// Find the EFI System Partition (ESP) among children.
241    ///
242    /// For GPT disks, this matches by the ESP partition type GUID.
243    /// For MBR (dos) disks, this matches by the MBR partition type IDs (0x06 or 0xEF).
244    ///
245    /// If no ESP is found among direct children, this recurses into children
246    /// that have their own partition table (e.g. firmware RAID arrays where the
247    /// hierarchy is disk → md array → partitions).
248    ///
249    /// Returns `Ok(None)` when there are no children or no ESP partition
250    /// is present. Returns `Err` only for genuinely unexpected conditions
251    /// (e.g. an unsupported partition table type).
252    pub fn find_partition_of_esp_optional(&self) -> Result<Option<&Device>> {
253        let Some(children) = self.children.as_ref() else {
254            return Ok(None);
255        };
256        let direct = match self.pttype.as_deref() {
257            Some("dos") => children.iter().find(|child| {
258                child
259                    .parttype
260                    .as_ref()
261                    .and_then(|pt| {
262                        let pt = pt.strip_prefix("0x").unwrap_or(pt);
263                        u8::from_str_radix(pt, 16).ok()
264                    })
265                    .is_some_and(|pt| ESP_ID_MBR.contains(&pt))
266            }),
267            // When pttype is None (e.g. older lsblk or partition devices), default
268            // to GPT UUID matching which will simply not match MBR hex types.
269            Some("gpt") | None => self.find_partition_of_type(ESP),
270            Some(other) => return Err(anyhow!("Unsupported partition table type: {other}")),
271        };
272        if direct.is_some() {
273            return Ok(direct);
274        }
275        // Recurse into children that carry their own partition table, such as
276        // firmware RAID arrays (disk → md array → partitions).
277        for child in children {
278            if child.pttype.is_some() {
279                if let Some(esp) = child.find_partition_of_esp_optional()? {
280                    return Ok(Some(esp));
281                }
282            }
283        }
284        Ok(None)
285    }
286
287    /// Find the EFI System Partition (ESP) among children, or error if absent.
288    ///
289    /// This is a convenience wrapper around [`Self::find_partition_of_esp_optional`]
290    /// for callers that require an ESP to be present.
291    pub fn find_partition_of_esp(&self) -> Result<&Device> {
292        self.find_partition_of_esp_optional()?
293            .ok_or_else(|| anyhow!("ESP partition not found on {}", self.path()))
294    }
295
296    /// Find a child partition by partition number (1-indexed).
297    pub fn find_device_by_partno(&self, partno: u32) -> Result<&Device> {
298        self.children
299            .as_ref()
300            .ok_or_else(|| anyhow!("Device has no children"))?
301            .iter()
302            .find(|child| child.partn == Some(partno))
303            .ok_or_else(|| anyhow!("Missing partition for index {partno}"))
304    }
305
306    /// Re-query this device's information from lsblk, updating all fields.
307    /// This is useful after partitioning when the device's children have changed.
308    pub fn refresh(&mut self) -> Result<()> {
309        let path = self.path();
310        let new_device = list_dev(Utf8Path::new(&path))?;
311        *self = new_device;
312        Ok(())
313    }
314
315    /// Read a sysfs property for this device and parse it as the target type.
316    fn read_sysfs_property<T>(&self, property: &str) -> Result<Option<T>>
317    where
318        T: std::str::FromStr,
319        T::Err: std::error::Error + Send + Sync + 'static,
320    {
321        let Some(majmin) = self.maj_min.as_deref() else {
322            return Ok(None);
323        };
324        let sysfs_path = format!("/sys/dev/block/{majmin}/{property}");
325        if !Utf8Path::new(&sysfs_path).try_exists()? {
326            return Ok(None);
327        }
328        let value = std::fs::read_to_string(&sysfs_path)
329            .with_context(|| format!("Reading {sysfs_path}"))?;
330        let parsed = value
331            .trim()
332            .parse()
333            .with_context(|| format!("Parsing sysfs {property} property"))?;
334        tracing::debug!("backfilled {property} to {value}");
335        Ok(Some(parsed))
336    }
337
338    /// Backfill properties that may be missing from lsblk output.
339    ///
340    /// Older versions of util-linux may lack `start` and `partn`; these are
341    /// backfilled from sysfs. When the udev database is unavailable (e.g.
342    /// inside a container sandbox), `parttype` and `pttype` are backfilled
343    /// via `blkid -p` which reads directly from the disk.
344    pub fn backfill_missing(&mut self) -> Result<()> {
345        // The "start" parameter was only added in a version of util-linux that's only
346        // in Fedora 40 as of this writing.
347        if self.start.is_none() {
348            self.start = self.read_sysfs_property("start")?;
349        }
350        // The "partn" column was added in util-linux 2.39, which is newer than
351        // what CentOS 9 / RHEL 9 ship (2.37). Note: sysfs uses "partition" not "partn".
352        if self.partn.is_none() {
353            self.partn = self.read_sysfs_property("partition")?;
354        }
355        // When udev is unavailable, lsblk can't populate parttype/pttype from
356        // the udev database. Fall back to blkid -p which probes the disk
357        // directly. See https://github.com/osbuild/osbuild/pull/2428
358        if !have_udev() && (self.parttype.is_none() || self.pttype.is_none()) {
359            let props = blkid_probe(&self.path())?;
360            if self.parttype.is_none() {
361                self.parttype = props.get("PART_ENTRY_TYPE").cloned();
362            }
363            if self.pttype.is_none() {
364                self.pttype = props.get("PTTYPE").cloned();
365            }
366        }
367        // Recurse to child devices
368        for child in self.children.iter_mut().flatten() {
369            child.backfill_missing()?;
370        }
371        Ok(())
372    }
373
374    /// Query parent devices via `lsblk --inverse`.
375    ///
376    /// Returns `Ok(None)` if this device is already a root device (no parents).
377    /// In the returned `Vec<Device>`, each device's `children` field contains
378    /// *its own* parents (grandparents, etc.), forming the full chain to the
379    /// root device(s). A device can have multiple parents (e.g. RAID, LVM).
380    pub fn list_parents(&self) -> Result<Option<Vec<Device>>> {
381        let path = self.path();
382        let output: DevicesOutput = Command::new("lsblk")
383            .args(["-J", "-b", "-O", "--inverse"])
384            .arg(&path)
385            .log_debug()
386            .run_and_parse_json()?;
387
388        let device = output
389            .blockdevices
390            .into_iter()
391            .next()
392            .ok_or_else(|| anyhow!("no device output from lsblk --inverse for {path}"))?;
393
394        match device.children {
395            Some(mut children) if !children.is_empty() => {
396                for child in &mut children {
397                    child.backfill_missing()?;
398                }
399                Ok(Some(children))
400            }
401            _ => Ok(None),
402        }
403    }
404
405    /// Walk the parent chain to find all root (whole disk) devices,
406    /// and fail if more than one root is found.
407    ///
408    /// This is a convenience wrapper around `find_all_roots` for callers
409    /// that expect exactly one backing device (e.g. non-RAID setups).
410    pub fn require_single_root(&self) -> Result<Device> {
411        let mut roots = self.find_all_roots()?;
412        match roots.len() {
413            1 => Ok(roots.remove(0)),
414            n => anyhow::bail!(
415                "Expected a single root device for {}, but found {n}",
416                self.path()
417            ),
418        }
419    }
420
421    /// Walk the parent chain to find all root (whole disk) devices.
422    ///
423    /// Returns all root devices with their children (partitions) populated.
424    /// This handles devices backed by multiple parents (e.g. RAID arrays)
425    /// by following all branches of the parent tree.
426    /// If this device is already a root device, returns a single-element list.
427    pub fn find_all_roots(&self) -> Result<Vec<Device>> {
428        let Some(parents) = self.list_parents()? else {
429            // Already a root device; re-query to ensure children are populated
430            return Ok(vec![list_dev(Utf8Path::new(&self.path()))?]);
431        };
432
433        let mut roots = Vec::new();
434        let mut seen = HashSet::new();
435        let mut queue = parents;
436        while let Some(mut device) = queue.pop() {
437            match device.children.take() {
438                Some(grandparents) if !grandparents.is_empty() => {
439                    queue.extend(grandparents);
440                }
441                _ => {
442                    // Deduplicate: in complex topologies (e.g. multipath)
443                    // multiple branches can converge on the same physical disk.
444                    let name = device.name.clone();
445                    if seen.insert(name) {
446                        // Found a new root; re-query to populate its actual children
447                        roots.push(list_dev(Utf8Path::new(&device.path()))?);
448                    }
449                }
450            }
451        }
452        Ok(roots)
453    }
454}
455
456#[context("Listing device {dev}")]
457pub fn list_dev(dev: &Utf8Path) -> Result<Device> {
458    let mut devs: DevicesOutput = Command::new("lsblk")
459        .args(["-J", "-b", "-O"])
460        .arg(dev)
461        .log_debug()
462        .run_and_parse_json()?;
463    for dev in devs.blockdevices.iter_mut() {
464        dev.backfill_missing()?;
465    }
466    devs.blockdevices
467        .into_iter()
468        .next()
469        .ok_or_else(|| anyhow!("no device output from lsblk for {dev}"))
470}
471
472#[context("Finding block device for ZFS dataset {dataset}")]
473fn list_dev_for_zfs_dataset(dataset: &str) -> Result<Device> {
474    let dataset = dataset.strip_prefix("ZFS=").unwrap_or(dataset);
475    let pool = dataset
476        .split('/')
477        .next()
478        .ok_or_else(|| anyhow!("Invalid ZFS dataset: {dataset}"))?;
479
480    let output = Command::new("zpool")
481        .args(["list", "-H", "-v", "-P", pool])
482        .run_get_string()
483        .with_context(|| format!("Querying ZFS pool {pool}"))?;
484
485    for line in output.lines() {
486        if line.starts_with('\t') || line.starts_with(' ') {
487            let dev_path = line.trim_start().split('\t').next().unwrap_or("").trim();
488            if dev_path.starts_with('/') {
489                return list_dev(Utf8Path::new(dev_path));
490            }
491        }
492    }
493
494    anyhow::bail!("Could not find a block device backing ZFS pool {pool}")
495}
496
497/// List the device containing the filesystem mounted at the given directory.
498pub fn list_dev_by_dir(dir: &Dir) -> Result<Device> {
499    let fsinfo = bootc_mount::inspect_filesystem_of_dir(dir)?;
500    let source = &fsinfo.source;
501    if fsinfo.fstype == "zfs" || source.starts_with("ZFS=") {
502        return list_dev_for_zfs_dataset(source);
503    }
504    list_dev(&Utf8PathBuf::from(source))
505}
506
507pub struct LoopbackDevice {
508    pub dev: Option<Utf8PathBuf>,
509    // Handle to the cleanup helper process
510    cleanup_handle: Option<LoopbackCleanupHandle>,
511}
512
513/// Handle to manage the cleanup helper process for loopback devices
514struct LoopbackCleanupHandle {
515    /// Child process handle
516    child: std::process::Child,
517}
518
519impl LoopbackDevice {
520    // Create a new loopback block device targeting the provided file path.
521    pub fn new(path: &Path) -> Result<Self> {
522        let direct_io = match env::var("BOOTC_DIRECT_IO") {
523            Ok(val) => {
524                if val == "on" {
525                    "on"
526                } else {
527                    "off"
528                }
529            }
530            Err(_e) => "off",
531        };
532
533        let dev = Command::new("losetup")
534            .args([
535                "--show",
536                format!("--direct-io={direct_io}").as_str(),
537                "-P",
538                "--find",
539            ])
540            .arg(path)
541            .run_get_string()?;
542        let dev = Utf8PathBuf::from(dev.trim());
543        tracing::debug!("Allocated loopback {dev}");
544
545        // Try to spawn cleanup helper, but don't fail if it doesn't work
546        let cleanup_handle = match Self::spawn_cleanup_helper(dev.as_str()) {
547            Ok(handle) => Some(handle),
548            Err(e) => {
549                tracing::warn!(
550                    "Failed to spawn loopback cleanup helper for {}: {}. \
551                     Loopback device may not be cleaned up if process is interrupted.",
552                    dev,
553                    e
554                );
555                None
556            }
557        };
558
559        Ok(Self {
560            dev: Some(dev),
561            cleanup_handle,
562        })
563    }
564
565    // Access the path to the loopback block device.
566    pub fn path(&self) -> &Utf8Path {
567        // SAFETY: The option cannot be destructured until we are dropped
568        self.dev.as_deref().unwrap()
569    }
570
571    /// Spawn a cleanup helper process that will clean up the loopback device
572    /// if the parent process dies unexpectedly
573    fn spawn_cleanup_helper(device_path: &str) -> Result<LoopbackCleanupHandle> {
574        // Try multiple strategies to find the bootc binary
575        let bootc_path = bootc_utils::reexec::executable_path()
576            .context("Failed to locate bootc binary for cleanup helper")?;
577
578        // Create the helper process
579        let mut cmd = Command::new(bootc_path);
580        cmd.args([
581            "internals",
582            "loopback-cleanup-helper",
583            "--device",
584            device_path,
585        ]);
586
587        // Set environment variable to indicate this is a cleanup helper
588        cmd.env("BOOTC_LOOPBACK_CLEANUP_HELPER", "1");
589
590        // Set up stdio to redirect to /dev/null
591        cmd.stdin(Stdio::null());
592        cmd.stdout(Stdio::null());
593        // Don't redirect stderr so we can see error messages
594
595        // Spawn the process
596        let child = cmd
597            .spawn()
598            .context("Failed to spawn loopback cleanup helper")?;
599
600        Ok(LoopbackCleanupHandle { child })
601    }
602
603    // Shared backend for our `close` and `drop` implementations.
604    fn impl_close(&mut self) -> Result<()> {
605        // SAFETY: This is the only place we take the option
606        let Some(dev) = self.dev.take() else {
607            tracing::trace!("loopback device already deallocated");
608            return Ok(());
609        };
610
611        // Kill the cleanup helper since we're cleaning up normally
612        if let Some(mut cleanup_handle) = self.cleanup_handle.take() {
613            // Send SIGTERM to the child process and let it do the cleanup
614            let _ = cleanup_handle.child.kill();
615        }
616
617        Command::new("losetup")
618            .args(["-d", dev.as_str()])
619            .run_capture_stderr()
620    }
621
622    /// Consume this device, unmounting it.
623    pub fn close(mut self) -> Result<()> {
624        self.impl_close()
625    }
626}
627
628impl Drop for LoopbackDevice {
629    fn drop(&mut self) {
630        // Best effort to unmount if we're dropped without invoking `close`
631        let _ = self.impl_close();
632    }
633}
634
635/// Main function for the loopback cleanup helper process
636/// This function does not return - it either exits normally or via signal
637pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> {
638    // Check if we're running as a cleanup helper
639    if std::env::var("BOOTC_LOOPBACK_CLEANUP_HELPER").is_err() {
640        anyhow::bail!("This function should only be called as a cleanup helper");
641    }
642
643    // Set up death signal notification - we want to be notified when parent dies
644    rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::TERM))
645        .context("Failed to set parent death signal")?;
646
647    // Wait for SIGTERM (either from parent death or normal cleanup)
648    tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
649        .expect("Failed to create signal stream")
650        .recv()
651        .await;
652
653    // Clean up the loopback device
654    let output = std::process::Command::new("losetup")
655        .args(["-d", device_path])
656        .output();
657
658    match output {
659        Ok(output) if output.status.success() => {
660            // Log to systemd journal instead of stderr
661            tracing::info!("Cleaned up leaked loopback device {}", device_path);
662            std::process::exit(0);
663        }
664        Ok(output) => {
665            let stderr = String::from_utf8_lossy(&output.stderr);
666            tracing::error!(
667                "Failed to clean up loopback device {}: {}. Stderr: {}",
668                device_path,
669                output.status,
670                stderr.trim()
671            );
672            std::process::exit(1);
673        }
674        Err(e) => {
675            tracing::error!(
676                "Error executing losetup to clean up loopback device {}: {}",
677                device_path,
678                e
679            );
680            std::process::exit(1);
681        }
682    }
683}
684
685/// Parse a string into mibibytes
686pub fn parse_size_mib(mut s: &str) -> Result<u64> {
687    let suffixes = [
688        ("MiB", 1u64),
689        ("M", 1u64),
690        ("GiB", 1024),
691        ("G", 1024),
692        ("TiB", 1024 * 1024),
693        ("T", 1024 * 1024),
694    ];
695    let mut mul = 1u64;
696    for (suffix, imul) in suffixes {
697        if let Some((sv, rest)) = s.rsplit_once(suffix) {
698            if !rest.is_empty() {
699                anyhow::bail!("Trailing text after size: {rest}");
700            }
701            s = sv;
702            mul = imul;
703        }
704    }
705    let v = s.parse::<u64>()?;
706    Ok(v * mul)
707}
708
709/// Extract a partition number by stripping the parent device path from the
710/// ESP partition device path, then skipping any non-digit separator characters.
711///
712/// Multipath partition devices are named by appending a partition suffix to
713/// the parent device path. The suffix may include a separator like "p" before
714/// the digits:
715///   - `/dev/mapper/mpatha`  + `2`  → `/dev/mapper/mpatha2`
716///   - `/dev/mapper/mpatha`  + `p2` → `/dev/mapper/mpathap2`
717///
718/// This function returns `None` if the ESP path doesn't start with the parent
719/// path or if no trailing digits are found in the suffix.
720fn parse_partition_number_from_suffix(parent_path: &str, esp_path: &str) -> Option<String> {
721    let suffix = esp_path.strip_prefix(parent_path)?;
722    let digits = suffix.trim_start_matches(|c: char| !c.is_ascii_digit());
723    if digits.is_empty() {
724        return None;
725    }
726    Some(digits.to_string())
727}
728
729#[cfg(test)]
730mod test {
731    use super::*;
732
733    #[test]
734    fn test_parse_size_mib() {
735        let ident_cases = [0, 10, 9, 1024].into_iter().map(|k| (k.to_string(), k));
736        let cases = [
737            ("0M", 0),
738            ("10M", 10),
739            ("10MiB", 10),
740            ("1G", 1024),
741            ("9G", 9216),
742            ("11T", 11 * 1024 * 1024),
743        ]
744        .into_iter()
745        .map(|(k, v)| (k.to_string(), v));
746        for (s, v) in ident_cases.chain(cases) {
747            assert_eq!(parse_size_mib(&s).unwrap(), v as u64, "Parsing {s}");
748        }
749    }
750
751    #[test]
752    fn test_parse_lsblk() {
753        let fixture = include_str!("../tests/fixtures/lsblk.json");
754        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
755        let dev = devs.blockdevices.into_iter().next().unwrap();
756        // The parent device has no partition number
757        assert_eq!(dev.partn, None);
758        let children = dev.children.as_deref().unwrap();
759        assert_eq!(children.len(), 3);
760        let first_child = &children[0];
761        assert_eq!(first_child.partn, Some(1));
762        assert_eq!(
763            first_child.parttype.as_deref().unwrap(),
764            "21686148-6449-6e6f-744e-656564454649"
765        );
766        assert_eq!(
767            first_child.partuuid.as_deref().unwrap(),
768            "3979e399-262f-4666-aabc-7ab5d3add2f0"
769        );
770        // Verify find_device_by_partno works
771        let part2 = dev.find_device_by_partno(2).unwrap();
772        assert_eq!(part2.partn, Some(2));
773        assert_eq!(part2.parttype.as_deref().unwrap(), ESP);
774        // Verify find_partition_of_esp works
775        let esp = dev.find_partition_of_esp().unwrap();
776        assert_eq!(esp.partn, Some(2));
777        // Verify find_partition_of_bios_boot works (vda1 is BIOS-BOOT)
778        let bios = dev.find_partition_of_bios_boot().unwrap();
779        assert_eq!(bios.partn, Some(1));
780        assert_eq!(bios.parttype.as_deref().unwrap(), BIOS_BOOT);
781    }
782
783    /// Verify that without the udev database, partition type fields are null
784    /// and partition discovery fails. This simulates what happens when bootc
785    /// runs inside a sandbox (like osbuild's bwrap) without /run/udev.
786    #[test]
787    fn test_parse_lsblk_no_udev() {
788        let fixture = include_str!("../tests/fixtures/lsblk-no-udev.json");
789        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
790        let dev = devs.blockdevices.into_iter().next().unwrap();
791        // Without udev, parttype and pttype are null
792        assert!(dev.pttype.is_none());
793        let children = dev.children.as_deref().unwrap();
794        assert_eq!(children.len(), 3);
795        assert!(children[0].parttype.is_none());
796        assert!(children[1].parttype.is_none());
797        assert!(children[2].parttype.is_none());
798        // ESP and BIOS boot discovery should fail (no parttype to match)
799        assert!(dev.find_partition_of_esp_optional().unwrap().is_none());
800        assert!(dev.find_partition_of_bios_boot().is_none());
801    }
802
803    #[test]
804    fn test_parse_lsblk_mbr() {
805        let fixture = include_str!("../tests/fixtures/lsblk-mbr.json");
806        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
807        let dev = devs.blockdevices.into_iter().next().unwrap();
808        // The parent device has no partition number and is MBR
809        assert_eq!(dev.partn, None);
810        assert_eq!(dev.pttype.as_deref().unwrap(), "dos");
811        let children = dev.children.as_deref().unwrap();
812        assert_eq!(children.len(), 3);
813        // First partition: FAT16 boot partition (MBR type 0x06, an ESP type)
814        let first_child = &children[0];
815        assert_eq!(first_child.partn, Some(1));
816        assert_eq!(first_child.parttype.as_deref().unwrap(), "0x06");
817        assert_eq!(first_child.partuuid.as_deref().unwrap(), "a1b2c3d4-01");
818        assert_eq!(first_child.fstype.as_deref().unwrap(), "vfat");
819        // MBR partitions have no partlabel
820        assert!(first_child.partlabel.is_none());
821        // Second partition: Linux root (MBR type 0x83)
822        let second_child = &children[1];
823        assert_eq!(second_child.partn, Some(2));
824        assert_eq!(second_child.parttype.as_deref().unwrap(), "0x83");
825        assert_eq!(second_child.partuuid.as_deref().unwrap(), "a1b2c3d4-02");
826        // Third partition: EFI System Partition (MBR type 0xef)
827        let third_child = &children[2];
828        assert_eq!(third_child.partn, Some(3));
829        assert_eq!(third_child.parttype.as_deref().unwrap(), "0xef");
830        assert_eq!(third_child.partuuid.as_deref().unwrap(), "a1b2c3d4-03");
831        // Verify find_device_by_partno works on MBR
832        let part1 = dev.find_device_by_partno(1).unwrap();
833        assert_eq!(part1.partn, Some(1));
834        // find_partition_of_esp returns the first matching ESP type (0x06 on partition 1)
835        let esp = dev.find_partition_of_esp().unwrap();
836        assert_eq!(esp.partn, Some(1));
837    }
838
839    /// Helper to construct a minimal MBR disk Device with given child partition types.
840    fn make_mbr_disk(parttypes: &[&str]) -> Device {
841        Device {
842            name: "vda".into(),
843            serial: None,
844            model: None,
845            partlabel: None,
846            parttype: None,
847            partuuid: None,
848            partn: None,
849            size: 10737418240,
850            maj_min: None,
851            start: None,
852            label: None,
853            fstype: None,
854            uuid: None,
855            path: Some("/dev/vda".into()),
856            pttype: Some("dos".into()),
857            children: Some(
858                parttypes
859                    .iter()
860                    .enumerate()
861                    .map(|(i, pt)| Device {
862                        name: format!("vda{}", i + 1),
863                        serial: None,
864                        model: None,
865                        partlabel: None,
866                        parttype: Some(pt.to_string()),
867                        partuuid: None,
868                        partn: Some(i as u32 + 1),
869                        size: 1048576,
870                        maj_min: None,
871                        start: Some(2048),
872                        label: None,
873                        fstype: None,
874                        uuid: None,
875                        path: None,
876                        pttype: Some("dos".into()),
877                        children: None,
878                    })
879                    .collect(),
880            ),
881        }
882    }
883
884    #[test]
885    fn test_parse_lsblk_vroc() {
886        let fixture = include_str!("../tests/fixtures/lsblk-vroc.json");
887        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
888        assert_eq!(devs.blockdevices.len(), 2);
889
890        // find_partition_of_esp recurses through the md126 RAID array to
891        // locate the ESP (md126p1) even though it is not a direct child of
892        // the NVMe disk.
893        for nvme in &devs.blockdevices {
894            let esp = nvme.find_partition_of_esp().unwrap();
895            assert_eq!(esp.name, "md126p1");
896            assert_eq!(esp.partn, Some(1));
897            assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
898            assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
899        }
900    }
901
902    #[test]
903    fn test_parse_lsblk_swraid() {
904        let fixture = include_str!("../tests/fixtures/lsblk-swraid.json");
905        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
906        assert_eq!(devs.blockdevices.len(), 2);
907
908        // In a software RAID (mdadm) setup each disk is individually
909        // partitioned with its own GPT table and ESP.  The root partition
910        // (sda3/sdb3) is a linux_raid_member assembled into md0.
911        // find_partition_of_esp should locate the ESP as a direct child of
912        // each disk — no recursion through an md array is needed here.
913        let sda = &devs.blockdevices[0];
914        let esp = sda.find_partition_of_esp().unwrap();
915        assert_eq!(esp.name, "sda1");
916        assert_eq!(esp.partn, Some(1));
917        assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
918        assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
919
920        let sdb = &devs.blockdevices[1];
921        let esp = sdb.find_partition_of_esp().unwrap();
922        assert_eq!(esp.name, "sdb1");
923        assert_eq!(esp.partn, Some(1));
924        assert_eq!(esp.parttype.as_deref().unwrap(), ESP);
925        assert_eq!(esp.fstype.as_deref().unwrap(), "vfat");
926
927        // Verify the md0 RAID array is visible as a child of the root
928        // partition on each disk.
929        let sda3 = sda
930            .children
931            .as_ref()
932            .unwrap()
933            .iter()
934            .find(|c| c.name == "sda3")
935            .unwrap();
936        assert_eq!(sda3.fstype.as_deref().unwrap(), "linux_raid_member");
937        let md0 = sda3
938            .children
939            .as_ref()
940            .unwrap()
941            .iter()
942            .find(|c| c.name == "md0")
943            .unwrap();
944        assert_eq!(md0.fstype.as_deref().unwrap(), "ext4");
945    }
946
947    #[test]
948    fn test_mbr_esp_detection() {
949        // 0x06 (FAT16) is recognized as ESP
950        let dev = make_mbr_disk(&["0x06"]);
951        assert_eq!(dev.find_partition_of_esp().unwrap().partn, Some(1));
952
953        // 0xef (EFI System Partition) is recognized as ESP
954        let dev = make_mbr_disk(&["0x83", "0xef"]);
955        assert_eq!(dev.find_partition_of_esp().unwrap().partn, Some(2));
956
957        // No ESP types present: 0x83 (Linux) and 0x82 (swap)
958        let dev = make_mbr_disk(&["0x83", "0x82"]);
959        assert!(dev.find_partition_of_esp().is_err());
960    }
961
962    #[test]
963    fn test_parse_partition_number_from_suffix() {
964        // Short alias like /dev/mapper/mpatha → /dev/mapper/mpatha2
965        assert_eq!(
966            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/mapper/mpatha2"),
967            Some("2".into())
968        );
969        // With a "p" separator: /dev/mapper/mpatha → /dev/mapper/mpathap2
970        assert_eq!(
971            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/mapper/mpathap2"),
972            Some("2".into())
973        );
974        // WWID-style name with "part" separator
975        assert_eq!(
976            parse_partition_number_from_suffix(
977                "/dev/mapper/3600508b4001",
978                "/dev/mapper/3600508b4001-part1"
979            ),
980            Some("1".into())
981        );
982        // Multi-digit partition number
983        assert_eq!(
984            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/mapper/mpatha12"),
985            Some("12".into())
986        );
987        // ESP path doesn't share the parent prefix → None
988        assert_eq!(
989            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/sda1"),
990            None
991        );
992        // No digits in suffix → None
993        assert_eq!(
994            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/mapper/mpathap"),
995            None
996        );
997        // Identical paths (no suffix at all) → None
998        assert_eq!(
999            parse_partition_number_from_suffix("/dev/mapper/mpatha", "/dev/mapper/mpatha"),
1000            None
1001        );
1002    }
1003}