libcoreinst/
blockdev.rs

1// Copyright 2019 CoreOS, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use anyhow::{anyhow, bail, Context, Result};
16use gptman::{GPTPartitionEntry, GPT};
17use lazy_static::lazy_static;
18use nix::sys::stat::{major, minor};
19use nix::{errno::Errno, mount, sched};
20use regex::Regex;
21use std::collections::{HashMap, HashSet};
22use std::fs::{
23    canonicalize, metadata, read_dir, read_to_string, remove_dir, symlink_metadata, File,
24    OpenOptions,
25};
26use std::io::{Read, Seek, SeekFrom, Write};
27use std::num::{NonZeroU32, NonZeroU64};
28use std::os::linux::fs::MetadataExt;
29use std::os::raw::c_int;
30use std::os::unix::fs::FileTypeExt;
31use std::os::unix::io::AsRawFd;
32use std::path::{Path, PathBuf};
33use std::process::Command;
34use std::thread::sleep;
35use std::time::Duration;
36use uuid::Uuid;
37
38use crate::cmdline::PartitionFilter;
39use crate::util::*;
40
41use crate::{runcmd, runcmd_output};
42
43#[derive(Debug)]
44pub struct Disk {
45    path: String,
46}
47
48impl Disk {
49    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
50        let path = path.as_ref();
51        let canon_path = path
52            .canonicalize()
53            .with_context(|| format!("canonicalizing {}", path.display()))?;
54
55        let canon_path = canon_path
56            .to_str()
57            .with_context(|| {
58                format!(
59                    "path {} canonicalized from {} is not UTF-8",
60                    canon_path.display(),
61                    path.display()
62                )
63            })?
64            .to_string();
65
66        Ok(Disk { path: canon_path })
67    }
68
69    pub fn mount_partition_by_label(&self, label: &str, flags: mount::MsFlags) -> Result<Mount> {
70        // get partition list
71        let partitions = self.get_partitions()?;
72        if partitions.is_empty() {
73            bail!("couldn't find any partitions on {}", self.path);
74        }
75
76        // find the partition with the matching label
77        let matching_partitions = partitions
78            .iter()
79            .filter(|d| d.label.as_ref().unwrap_or(&"".to_string()) == label)
80            .collect::<Vec<&Partition>>();
81        let part = match matching_partitions.len() {
82            0 => bail!("couldn't find {} device for {}", label, self.path),
83            1 => matching_partitions[0],
84            _ => bail!(
85                "found multiple devices on {} with label \"{}\"",
86                self.path,
87                label
88            ),
89        };
90
91        // mount it
92        match &part.fstype {
93            Some(fstype) => Mount::try_mount(&part.path, fstype, flags),
94            None => bail!(
95                "couldn't get filesystem type of {} device for {}",
96                label,
97                self.path
98            ),
99        }
100    }
101
102    fn get_partitions(&self) -> Result<Vec<Partition>> {
103        // walk each device in the output
104        let mut result: Vec<Partition> = Vec::new();
105        for devinfo in lsblk(Path::new(&self.path), true)? {
106            if let Some(name) = devinfo.get("NAME") {
107                // Only return partitions.  Skip the whole-disk device, as well
108                // as holders like LVM or RAID devices using one of the partitions.
109                if devinfo.get("TYPE").map(|s| s.as_str()) != Some("part") {
110                    continue;
111                }
112                // only trust lsblk output for the following fields if we have udev
113                if have_udev() {
114                    let (mountpoint, swap) = match devinfo.get("MOUNTPOINT") {
115                        Some(mp) if mp == "[SWAP]" => (None, true),
116                        Some(mp) => (Some(mp.to_string()), false),
117                        None => (None, false),
118                    };
119                    result.push(Partition {
120                        path: name.to_owned(),
121                        label: devinfo.get("LABEL").map(<_>::to_string),
122                        fstype: devinfo.get("FSTYPE").map(<_>::to_string),
123                        parent: self.path.to_owned(),
124                        mountpoint,
125                        swap,
126                    });
127                } else {
128                    let devinfo = blkid_single(Path::new(name))?;
129                    // note TYPE here: blkid uses TYPE instead of FSTYPE
130                    let fstype = devinfo.get("TYPE").map(<_>::to_string);
131                    result.push(Partition {
132                        path: name.to_owned(),
133                        label: devinfo.get("LABEL").map(<_>::to_string),
134                        fstype: fstype.to_owned(),
135                        parent: self.path.to_owned(),
136                        mountpoint: None,
137                        swap: fstype.is_some_and(|s| s == "swap"),
138                    });
139                }
140            }
141        }
142        Ok(result)
143    }
144
145    /// Return an empty list if we have exclusive access to the device, or
146    /// a list of partitions preventing us from gaining exclusive access.
147    pub fn get_busy_partitions(self) -> Result<Vec<Partition>> {
148        // Try rereading the partition table.  This is the most complete
149        // check, but it only works on partitionable devices.
150        let rereadpt_result = {
151            let mut f = OpenOptions::new()
152                .write(true)
153                .open(&self.path)
154                .with_context(|| format!("opening {}", &self.path))?;
155            reread_partition_table(&mut f, false).map(|_| Vec::new())
156        };
157        if rereadpt_result.is_ok() {
158            return rereadpt_result;
159        }
160
161        // Walk partitions, record the ones that are reported in use,
162        // and return the list if any
163        let mut busy: Vec<Partition> = Vec::new();
164        for d in self.get_partitions()? {
165            if d.mountpoint.is_some() || d.swap || !d.get_holders()?.is_empty() {
166                busy.push(d)
167            }
168        }
169        if !busy.is_empty() {
170            return Ok(busy);
171        }
172
173        // Our investigation found nothing.  If the device is expected to be
174        // partitionable but reread failed, we evidently missed something,
175        // so error out for safety
176        if !self.is_dm_device() {
177            return rereadpt_result;
178        }
179
180        Ok(Vec::new())
181    }
182
183    /// Get a handle to the set of device nodes for individual partitions
184    /// of the device.
185    pub fn get_partition_table(&self) -> Result<Box<dyn PartTable>> {
186        if self.is_dm_device() {
187            Ok(Box::new(PartTableKpartx::new(&self.path)?))
188        } else {
189            Ok(Box::new(PartTableKernel::new(&self.path)?))
190        }
191    }
192
193    pub fn is_dm_device(&self) -> bool {
194        self.path.starts_with("/dev/dm-")
195    }
196
197    pub fn is_luks_integrity(&self) -> Result<bool> {
198        if !self.is_dm_device() {
199            return Ok(false);
200        }
201        let dminfo = runcmd_output!(
202            "dmsetup",
203            "info",
204            "--columns",
205            "--noheadings",
206            "-o",
207            "uuid",
208            &self.path
209        )
210        .with_context(|| format!("checking if device {} is type LUKS integrity", self.path))?;
211
212        let uuid = dminfo.trim();
213        // since cryptsetup-2.8.0 SUBDEV is used
214        Ok(uuid.starts_with("CRYPT-INTEGRITY-") || uuid.starts_with("CRYPT-SUBDEV-"))
215    }
216}
217
218/// A handle to the set of device nodes for individual partitions of a
219/// device.  Must be held as long as the device nodes are needed; they might
220/// be removed upon drop.
221pub trait PartTable {
222    /// Update device nodes for the current state of the partition table
223    fn reread(&mut self) -> Result<()>;
224}
225
226/// Device nodes for partitionable kernel devices, managed by the kernel.
227#[derive(Debug)]
228pub struct PartTableKernel {
229    file: File,
230}
231
232impl PartTableKernel {
233    fn new(path: &str) -> Result<Self> {
234        let file = OpenOptions::new()
235            .write(true)
236            .open(path)
237            .with_context(|| format!("opening {path}"))?;
238        Ok(Self { file })
239    }
240}
241
242impl PartTable for PartTableKernel {
243    fn reread(&mut self) -> Result<()> {
244        reread_partition_table(&mut self.file, true)?;
245        udev_settle()
246    }
247}
248
249/// Device nodes for non-partitionable kernel devices, managed by running
250/// kpartx to parse the partition table and create device-mapper devices for
251/// each partition.
252#[derive(Debug)]
253pub struct PartTableKpartx {
254    path: String,
255    need_teardown: bool,
256}
257
258impl PartTableKpartx {
259    fn new(path: &str) -> Result<Self> {
260        let mut table = Self {
261            path: path.to_string(),
262            need_teardown: !Self::already_set_up(path)?,
263        };
264        // create/sync partition devices if missing
265        table.reread()?;
266        Ok(table)
267    }
268
269    // We only want to kpartx -d on drop if we're the one initially
270    // creating the partition devices.  There's no good way to detect
271    // this.
272    fn already_set_up(path: &str) -> Result<bool> {
273        let re = Regex::new(r"^p[0-9]+$").expect("compiling RE");
274        let expected = Path::new(path)
275            .file_name()
276            .with_context(|| format!("getting filename of {path}"))?
277            .to_os_string()
278            .into_string()
279            .map_err(|_| anyhow!("converting filename of {}", path))?;
280        for ent in read_dir("/dev/mapper").context("listing /dev/mapper")? {
281            let ent = ent.context("reading /dev/mapper entry")?;
282            let found = ent.file_name().into_string().map_err(|_| {
283                anyhow!(
284                    "converting filename of {}",
285                    Path::new(&ent.file_name()).display()
286                )
287            })?;
288            if found.starts_with(&expected) && re.is_match(&found[expected.len()..]) {
289                return Ok(true);
290            }
291        }
292        Ok(false)
293    }
294
295    fn run_kpartx(&self, flag: &str) -> Result<()> {
296        // Swallow stderr on success.  Avoids spurious warnings:
297        //   GPT:Primary header thinks Alt. header is not at the end of the disk.
298        //   GPT:Alternate GPT header not at the end of the disk.
299        //   GPT: Use GNU Parted to correct GPT errors.
300        //
301        // By default, kpartx waits for udev to settle before returning,
302        // but this blocks indefinitely inside a container.  See e.g.
303        //   https://github.com/moby/moby/issues/22025
304        // Use -n to skip blocking on udev, and then manually settle.
305        runcmd_output!("kpartx", flag, "-n", &self.path)?;
306        udev_settle()?;
307        Ok(())
308    }
309}
310
311impl PartTable for PartTableKpartx {
312    fn reread(&mut self) -> Result<()> {
313        let delay = 1;
314        for _ in 0..4 {
315            match self.run_kpartx("-u") {
316                Ok(()) => return Ok(()),
317                Err(e) => eprintln!("Error: {e}"),
318            }
319            eprintln!("Retrying in {delay} second");
320            sleep(Duration::from_secs(delay));
321        }
322        self.run_kpartx("-u")
323    }
324}
325
326impl Drop for PartTableKpartx {
327    /// If we created the partition devices (rather than finding them
328    /// already existing), delete them afterward so we don't leave DM
329    /// devices attached to the specified disk.
330    fn drop(&mut self) {
331        if self.need_teardown {
332            if let Err(e) = self.run_kpartx("-d") {
333                eprintln!("{e}")
334            }
335        }
336    }
337}
338
339#[derive(Debug)]
340pub struct Partition {
341    pub path: String,
342    pub label: Option<String>,
343    pub fstype: Option<String>,
344
345    pub parent: String,
346    pub mountpoint: Option<String>,
347    pub swap: bool,
348}
349
350impl Partition {
351    /// Return start and end offsets within the disk.
352    pub fn get_offsets(path: &str) -> Result<(u64, u64)> {
353        let dev = metadata(path)
354            .with_context(|| format!("getting metadata for {path}"))?
355            .st_rdev();
356        let maj: u64 = major(dev);
357        let min: u64 = minor(dev);
358
359        let start = read_sysfs_dev_block_value_u64(maj, min, "start")?;
360        let size = read_sysfs_dev_block_value_u64(maj, min, "size")?;
361
362        // We multiply by 512 here: the kernel values are always in 512 blocks, regardless of the
363        // actual sector size of the block device. We keep the values as bytes to make things
364        // easier.
365        let start_offset: u64 = start
366            .checked_mul(512)
367            .context("start offset mult overflow")?;
368        let end_offset: u64 = start_offset
369            .checked_add(size.checked_mul(512).context("end offset mult overflow")?)
370            .context("end offset add overflow")?;
371        Ok((start_offset, end_offset))
372    }
373
374    pub fn get_holders(&self) -> Result<Vec<String>> {
375        let holders = self.get_sysfs_dir()?.join("holders");
376        let mut ret: Vec<String> = Vec::new();
377        for ent in read_dir(&holders).with_context(|| format!("reading {}", &holders.display()))? {
378            let ent = ent.with_context(|| format!("reading {} entry", &holders.display()))?;
379            ret.push(format!("/dev/{}", ent.file_name().to_string_lossy()));
380        }
381        Ok(ret)
382    }
383
384    // Try to locate the device directory in sysfs.
385    fn get_sysfs_dir(&self) -> Result<PathBuf> {
386        let basedir = Path::new("/sys/block");
387
388        // First assume we have a regular partition.
389        // /sys/block/sda/sda1
390        let devdir = basedir
391            .join(
392                Path::new(&self.parent)
393                    .file_name()
394                    .with_context(|| format!("parent {} has no filename", self.parent))?,
395            )
396            .join(
397                Path::new(&self.path)
398                    .file_name()
399                    .with_context(|| format!("path {} has no filename", self.path))?,
400            );
401        if devdir.exists() {
402            return Ok(devdir);
403        }
404
405        // Now assume a kpartx "partition", where the path is a symlink to
406        // an unpartitioned DM device node.
407        // /sys/block/dm-1
408        let is_link = symlink_metadata(&self.path)
409            .with_context(|| format!("reading metadata for {}", self.path))?
410            .file_type()
411            .is_symlink();
412        if is_link {
413            let target = canonicalize(&self.path)
414                .with_context(|| format!("getting absolute path to {}", self.path))?;
415            let devdir = basedir.join(
416                target
417                    .file_name()
418                    .with_context(|| format!("target {} has no filename", target.display()))?,
419            );
420            if devdir.exists() {
421                return Ok(devdir);
422            }
423        }
424
425        // Give up
426        bail!(
427            "couldn't find /sys/block directory for partition {} of {}",
428            &self.path,
429            &self.parent
430        );
431    }
432}
433
434#[derive(Debug)]
435pub struct Mount {
436    device: String,
437    mountpoint: PathBuf,
438    /// Whether we own this mount.
439    owned: bool,
440}
441
442impl Mount {
443    pub fn try_mount(device: &str, fstype: &str, flags: mount::MsFlags) -> Result<Mount> {
444        let tempdir = tempfile::Builder::new()
445            .prefix("coreos-installer-")
446            .tempdir()
447            .context("creating temporary directory")?;
448        // avoid auto-cleanup of tempdir, which could recursively remove
449        // the partition contents if umount failed
450        let mountpoint = tempdir.keep();
451
452        // Ensure we're in a private mount namespace so the mount isn't
453        // visible to the rest of the system.  Multiple unshare calls
454        // should be safe.
455        sched::unshare(sched::CloneFlags::CLONE_NEWNS).context("unsharing mount namespace")?;
456
457        mount::mount::<str, Path, str, str>(Some(device), &mountpoint, Some(fstype), flags, None)
458            .with_context(|| format!("mounting device {} on {}", device, mountpoint.display()))?;
459
460        Ok(Mount {
461            device: device.to_string(),
462            mountpoint,
463            owned: true,
464        })
465    }
466
467    pub fn from_existing<P: AsRef<Path>>(path: P) -> Result<Mount> {
468        let mounts = read_to_string("/proc/self/mounts").context("reading mount table")?;
469        for line in mounts.lines() {
470            let mount: Vec<&str> = line.split_whitespace().collect();
471            // see https://man7.org/linux/man-pages/man5/fstab.5.html
472            if mount.len() != 6 {
473                bail!("invalid line in /proc/self/mounts: {}", line);
474            }
475            if Path::new(mount[1]) == path.as_ref() {
476                return Ok(Mount {
477                    device: mount[0].to_string(),
478                    mountpoint: path.as_ref().into(),
479                    owned: false,
480                });
481            }
482        }
483        bail!("mountpoint {} not found", path.as_ref().display());
484    }
485
486    pub fn device(&self) -> &str {
487        self.device.as_str()
488    }
489
490    pub fn mountpoint(&self) -> &Path {
491        self.mountpoint.as_path()
492    }
493
494    pub fn get_partition_offsets(&self) -> Result<(u64, u64)> {
495        Partition::get_offsets(&self.device)
496    }
497
498    pub fn get_filesystem_uuid(&self) -> Result<String> {
499        // We used to use lsblk_single, but its cache may be stale after mkfs.
500        // blkid_single doesn't use cache.
501        let devinfo = blkid_single(Path::new(&self.device))?;
502        devinfo
503            .get("UUID")
504            .map(String::from)
505            .with_context(|| format!("filesystem {} has no UUID", self.device))
506    }
507}
508
509impl Drop for Mount {
510    fn drop(&mut self) {
511        if !self.owned {
512            return;
513        }
514
515        // Unmount sometimes fails immediately after closing the last open
516        // file on the partition.  Retry several times before giving up.
517        for retries in (0..20).rev() {
518            match mount::umount(&self.mountpoint) {
519                Ok(_) => break,
520                Err(err) => {
521                    if retries == 0 {
522                        eprintln!("umounting {}: {}", self.device, err);
523                        return;
524                    } else {
525                        sleep(Duration::from_millis(100));
526                    }
527                }
528            }
529        }
530        if let Err(err) = remove_dir(&self.mountpoint) {
531            eprintln!("removing {}: {}", self.mountpoint.display(), err);
532        }
533    }
534}
535
536#[derive(Debug)]
537pub struct SavedPartitions {
538    sector_size: u64,
539    partitions: Vec<(u32, GPTPartitionEntry)>,
540}
541
542impl SavedPartitions {
543    /// Create a SavedPartitions for a block device with a sector size.
544    pub fn new_from_disk(disk: &mut File, filters: &[PartitionFilter]) -> Result<Self> {
545        if !disk
546            .metadata()
547            .context("getting disk metadata")?
548            .file_type()
549            .is_block_device()
550        {
551            bail!("specified file is not a block device");
552        }
553        Self::new(disk, get_sector_size(disk)?.get() as u64, filters)
554    }
555
556    /// Create a SavedPartitions for a file with a specified imputed sector
557    /// size.  Useful for unit tests, and fails on a real disk.
558    #[cfg(test)]
559    pub fn new_from_file(
560        disk: &mut File,
561        sector_size: u64,
562        filters: &[PartitionFilter],
563    ) -> Result<Self> {
564        if disk
565            .metadata()
566            .context("getting disk metadata")?
567            .file_type()
568            .is_block_device()
569        {
570            bail!("called new_from_file() on a block device");
571        }
572        match sector_size {
573            512 | 4096 => (),
574            _ => bail!("specified unreasonable sector size {}", sector_size),
575        }
576        Self::new(disk, sector_size, filters)
577    }
578
579    fn new(disk: &mut File, sector_size: u64, filters: &[PartitionFilter]) -> Result<Self> {
580        // if there are no filters, ignore existing GPT, since we're going to
581        // overwrite it
582        if filters.is_empty() {
583            return Ok(Self {
584                sector_size,
585                partitions: Vec::new(),
586            });
587        }
588
589        // read GPT
590        let gpt = match GPT::find_from(disk) {
591            Ok(gpt) => gpt,
592            Err(gptman::Error::InvalidSignature) => {
593                // ensure no indexes are listed to be saved from a MBR disk
594                // we don't need to check for labels since MBR does not support them
595                if filters
596                    .iter()
597                    .any(|f| matches!(f, PartitionFilter::Index(_, _)))
598                    && disk_has_mbr(disk).context("checking if disk has an MBR")?
599                {
600                    bail!("saving partitions from an MBR disk is not yet supported");
601                }
602
603                // no GPT on this disk, so no partitions to save
604                return Ok(Self {
605                    sector_size,
606                    partitions: Vec::new(),
607                });
608            }
609            Err(e) => return Err(e).context("reading partition table"),
610        };
611
612        // cross-check GPT sector size
613        Self::verify_gpt_sector_size(&gpt, sector_size)?;
614
615        // save partitions accepted by filters
616        let mut partitions = Vec::new();
617        for (i, p) in gpt.iter() {
618            if Self::matches_filters(i, p, filters) {
619                partitions.push((i, p.clone()));
620            }
621        }
622        let result = Self {
623            sector_size,
624            partitions,
625        };
626
627        // Test restoring the saved partitions to a temporary file.  If the
628        // resulting partition table contains invalid data (e.g. duplicate
629        // partition GUIDs) we need to know now, before the caller
630        // overwrites the partition table.  Otherwise we could fail to
631        // restore, clear the table, and fail to restore _again_ to the
632        // empty table.
633        if !result.partitions.is_empty() {
634            let len = disk.seek(SeekFrom::End(0)).context("getting disk size")?;
635            let mut temp = tempfile::tempfile().context("creating dry run image")?;
636            temp.set_len(len)
637                .with_context(|| format!("setting test image size to {len}"))?;
638            result.overwrite(&mut temp).context(
639                "failed dry run restoring saved partitions; input partition table may be invalid",
640            )?;
641        }
642
643        Ok(result)
644    }
645
646    fn verify_disk_sector_size(&self, disk: &File) -> Result<()> {
647        if !disk
648            .metadata()
649            .context("getting disk metadata")?
650            .file_type()
651            .is_block_device()
652        {
653            return Ok(());
654        }
655        let disk_sector_size = get_sector_size(disk)?.get() as u64;
656        if disk_sector_size != self.sector_size {
657            bail!(
658                "disk sector size {} doesn't match expected {}",
659                disk_sector_size,
660                self.sector_size
661            );
662        }
663        Ok(())
664    }
665
666    fn verify_gpt_sector_size(gpt: &GPT, sector_size: u64) -> Result<()> {
667        if gpt.sector_size != sector_size {
668            bail!(
669                "GPT sector size {} doesn't match expected {}",
670                gpt.sector_size,
671                sector_size
672            );
673        }
674        Ok(())
675    }
676
677    fn matches_filters(i: u32, p: &GPTPartitionEntry, filters: &[PartitionFilter]) -> bool {
678        use PartitionFilter::*;
679        if !p.is_used() {
680            return false;
681        }
682        filters.iter().any(|f| match f {
683            Index(Some(first), _) if first.get() > i => false,
684            Index(_, Some(last)) if last.get() < i => false,
685            Index(_, _) => true,
686            Label(glob) if glob.matches(p.partition_name.as_str()) => true,
687            _ => false,
688        })
689    }
690
691    /// Unconditionally write the saved partitions, and only the saved
692    /// partitions, to the disk.  Write a protective MBR and overwrite any
693    /// MBR boot code.  Updating the kernel partition table is the caller's
694    /// responsibility.
695    pub fn overwrite(&self, disk: &mut File) -> Result<()> {
696        // create GPT
697        self.verify_disk_sector_size(disk)?;
698        let mut gpt = GPT::new_from(disk, self.sector_size, *Uuid::new_v4().as_bytes())
699            .context("creating new GPT")?;
700
701        // add partitions
702        for (i, p) in &self.partitions {
703            gpt[*i] = p.clone();
704        }
705
706        // write GPT
707        gpt.write_into(disk).context("writing new GPT")?;
708
709        // Overwrite only the parts of the MBR that don't contain the
710        // partition table, then write protective MBR.  This ensures that
711        // there's no time window without an MBR, during which the kernel
712        // would refuse to read the GPT.
713        disk.rewind().context("seeking to MBR")?;
714        disk.write(&[0u8; 446])
715            .context("overwriting MBR boot code")?;
716        if self.sector_size > 512 {
717            disk.seek(SeekFrom::Start(512))
718                .context("seeking to end of MBR")?;
719            disk.write(&vec![0u8; self.sector_size as usize - 512])
720                .context("overwriting end of MBR")?;
721        }
722        GPT::write_protective_mbr_into(disk, self.sector_size).context("writing protective MBR")?;
723
724        Ok(())
725    }
726
727    /// If any partitions are saved, merge them into the GPT from source,
728    /// which must be valid, and write a protective MBR with the correct
729    /// protective partition size.  Updating the kernel partition table is
730    /// the caller's responsibility.
731    pub fn merge(&self, source: &mut (impl Read + Seek), disk: &mut File) -> Result<()> {
732        if self.partitions.is_empty() {
733            return Ok(());
734        }
735
736        // read GPT
737        self.verify_disk_sector_size(disk)?;
738        let mut gpt =
739            GPT::find_from(source).context("couldn't read partition table from source")?;
740        Self::verify_gpt_sector_size(&gpt, self.sector_size)?;
741        // The GPT thinks the disk is the size of the install image.
742        // Update sizing.
743        gpt.header
744            .update_from(disk, self.sector_size)
745            .context("updating GPT header")?;
746
747        // merge saved partitions into partition table
748        // find partition number one larger than the largest used one
749        let mut next = gpt
750            .iter()
751            .fold(1, |prev, (i, e)| if e.is_used() { i + 1 } else { prev });
752        for (i, p) in &self.partitions {
753            // use the next partition number in the sequence if we have to,
754            // or the partition's original number if it's larger
755            next = next.max(*i);
756            eprintln!(
757                "Saving partition {} (\"{}\") to new partition {}",
758                i, p.partition_name, next
759            );
760            gpt[next] = p.clone();
761            next += 1;
762        }
763
764        // write
765        gpt.write_into(disk).context("writing updated GPT")?;
766
767        // update protective partition size
768        GPT::write_protective_mbr_into(disk, self.sector_size).context("writing protective MBR")?;
769
770        Ok(())
771    }
772
773    /// Get the sector size in use for this partition table.
774    pub fn get_sector_size(&self) -> u64 {
775        self.sector_size
776    }
777
778    /// Get the byte offset of the first byte not to be overwritten, if any,
779    /// plus a description of the partition at that offset.
780    pub fn get_offset(&self) -> Result<Option<(u64, String)>> {
781        match self.partitions.iter().min_by_key(|(_, p)| p.starting_lba) {
782            None => Ok(None),
783            Some((i, p)) => Ok(Some((
784                p.starting_lba
785                    .checked_mul(self.sector_size)
786                    .context("overflow calculating partition start")?,
787                format!("partition {} (\"{}\")", i, p.partition_name.as_str()),
788            ))),
789        }
790    }
791
792    pub fn is_saved(&self) -> bool {
793        !self.partitions.is_empty()
794    }
795}
796
797fn read_sysfs_dev_block_value_u64(maj: u64, min: u64, field: &str) -> Result<u64> {
798    let s = read_sysfs_dev_block_value(maj, min, field)
799        .with_context(|| format!("reading partition {maj}:{min} {field} value from sysfs"))?;
800    s.parse().with_context(|| {
801        format!(
802            "parsing partition {}:{} {} value \"{}\" as u64",
803            maj, min, field, &s
804        )
805    })
806}
807
808fn read_sysfs_dev_block_value(maj: u64, min: u64, field: &str) -> Result<String> {
809    let path = PathBuf::from(format!("/sys/dev/block/{maj}:{min}/{field}"));
810    Ok(read_to_string(path)?.trim_end().into())
811}
812
813pub fn lsblk_single(dev: &Path) -> Result<HashMap<String, String>> {
814    let mut devinfos = lsblk(Path::new(dev), false)?;
815    if devinfos.is_empty() {
816        // this should never happen because `lsblk` itself would've failed
817        bail!("no lsblk results for {}", dev.display());
818    }
819    Ok(devinfos.remove(0))
820}
821
822pub fn blkid_single(dev: &Path) -> Result<HashMap<String, String>> {
823    let mut devinfos = blkid(Some(vec![dev]))?;
824    if devinfos.len() != 1 {
825        bail!(
826            "expected one blkid result for {}, got: {:?}",
827            dev.display(),
828            devinfos
829        );
830    }
831    Ok(devinfos.remove(0))
832}
833/// Returns all available filesystems.
834/// rereadpt mitigates possible issue with outdated UUIDs on different
835/// paths to the same disk: after 'ignition-ostree-firstboot-uuid'
836/// '/dev/sdaX' path gets new UUID, but '/dev/sdbX/' path has an old one
837fn get_all_filesystems(rereadpt: bool) -> Result<Vec<HashMap<String, String>>> {
838    if rereadpt {
839        let mut cmd = Command::new("lsblk");
840        cmd.arg("--noheadings")
841            .arg("--nodeps")
842            .arg("--list")
843            .arg("--paths")
844            .arg("--output")
845            .arg("NAME");
846        let output = cmd_output(&mut cmd)?;
847        for dev in output.lines() {
848            if let Ok(mut fd) = std::fs::File::open(dev) {
849                // best-effort reread of disk that may have busy partitions; don't retry
850                let _ = reread_partition_table(&mut fd, false);
851            }
852        }
853        udev_settle()?;
854    }
855    blkid(None)
856}
857
858/// Returns filesystems with given label.
859/// If multiple filesystems with the label have the same UUID, we only return one of them.
860pub fn get_filesystems_with_label(label: &str, rereadpt: bool) -> Result<Vec<String>> {
861    let mut uuids = HashSet::new();
862    let result = get_all_filesystems(rereadpt)?
863        .iter()
864        .filter(|v| v.get("LABEL").map(|l| l.as_str()) == Some(label))
865        .filter(|v| match v.get("UUID") {
866            Some(uuid) => {
867                if !uuid.is_empty() {
868                    uuids.insert(uuid)
869                } else {
870                    true
871                }
872            }
873            None => true,
874        })
875        .filter_map(|v| v.get("NAME").map(<_>::to_owned))
876        .collect();
877    Ok(result)
878}
879
880pub fn lsblk(dev: &Path, with_deps: bool) -> Result<Vec<HashMap<String, String>>> {
881    let mut cmd = Command::new("lsblk");
882    // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but --paths option
883    cmd.arg("--pairs")
884        .arg("--paths")
885        .arg("--output")
886        .arg("NAME,LABEL,FSTYPE,TYPE,MOUNTPOINT,UUID")
887        .arg(dev);
888    if !with_deps {
889        cmd.arg("--nodeps");
890    }
891    let output = cmd_output(&mut cmd)?;
892    let mut result: Vec<HashMap<String, String>> = Vec::new();
893    for line in output.lines() {
894        // parse key-value pairs
895        result.push(split_lsblk_line(line));
896    }
897    Ok(result)
898}
899
900/// Parse key-value pairs from blkid.
901fn split_blkid_line(line: &str) -> HashMap<String, String> {
902    let (name, data) = match line.find(':') {
903        Some(n) => line.split_at(n),
904        None => return HashMap::new(),
905    };
906
907    let (name, data) = (name.trim(), data[1..].trim());
908    if name.is_empty() {
909        return HashMap::new();
910    }
911
912    let mut fields = split_lsblk_line(data);
913    fields.insert("NAME".to_string(), name.to_string());
914    fields
915}
916
917fn blkid(devices: Option<Vec<&Path>>) -> Result<Vec<HashMap<String, String>>> {
918    // Run blkid with a clean cache to avoid collecting old devices which no
919    // longer exist.
920    // https://github.com/coreos/coreos-installer/pull/1288#discussion_r1312008111
921
922    // Run once to gather the list of devices, which we need to specify for
923    // the blkid -p below, which we use to probe the devices to not rely on
924    // the blkid cache:
925    // https://github.com/coreos/fedora-coreos-config/pull/2181#issuecomment-1397386896
926    let found_devices; // need this for &Path refs to outlive the else block
927    let devices: Vec<&Path> = if let Some(paths) = devices {
928        paths
929    } else {
930        found_devices = {
931            let mut cmd = Command::new("blkid");
932            cmd.arg("--cache-file");
933            cmd.arg("/dev/null");
934            cmd.arg("-o");
935            cmd.arg("device");
936            cmd_output(&mut cmd)?
937        };
938        found_devices
939            .lines()
940            .map(|line| Path::new(line.trim()))
941            .collect()
942    };
943    let output = {
944        let mut cmd = Command::new("blkid");
945        cmd.arg("-p");
946        cmd.args(devices);
947        cmd_output(&mut cmd)?
948    };
949
950    let mut result: Vec<HashMap<String, String>> = Vec::new();
951    for line in output.lines() {
952        result.push(split_blkid_line(line));
953    }
954    Ok(result)
955}
956
957/// This is a bit fuzzy, but... this function will return every block device in the parent
958/// hierarchy of `device` capable of containing other partitions. So e.g. parent devices of type
959/// "part" doesn't match, but "disk" and "mpath" does.
960pub fn find_parent_devices(device: &str) -> Result<Vec<String>> {
961    let mut cmd = Command::new("lsblk");
962    // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but --paths option
963    cmd.arg("--pairs")
964        .arg("--paths")
965        .arg("--inverse")
966        .arg("--output")
967        .arg("NAME,TYPE")
968        .arg(device);
969    let output = cmd_output(&mut cmd)?;
970    let mut parents = Vec::new();
971    // skip first line, which is the device itself
972    for line in output.lines().skip(1) {
973        let dev = split_lsblk_line(line);
974        let name = dev
975            .get("NAME")
976            .with_context(|| format!("device in hierarchy of {device} missing NAME"))?;
977        let kind = dev
978            .get("TYPE")
979            .with_context(|| format!("device in hierarchy of {device} missing TYPE"))?;
980        if kind == "disk" {
981            parents.push(name.clone());
982        } else if kind == "mpath" {
983            parents.push(name.clone());
984            // we don't need to know what disks back the multipath
985            break;
986        }
987    }
988    if parents.is_empty() {
989        bail!("no parent devices found for {}", device);
990    }
991    Ok(parents)
992}
993
994/// Find ESP partitions which sit at the same hierarchy level as `device`.
995pub fn find_colocated_esps(device: &str) -> Result<Vec<String>> {
996    const ESP_TYPE_GUID: &str = "c12a7328-f81f-11d2-ba4b-00a0c93ec93b";
997
998    // first, get the parent device
999    let parent_devices = find_parent_devices(device)
1000        .with_context(|| format!("while looking for colocated ESPs of '{device}'"))?;
1001
1002    // now, look for all ESPs on those devices
1003    let mut esps = Vec::new();
1004    for parent_device in parent_devices {
1005        let mut cmd = Command::new("lsblk");
1006        // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but --paths option
1007        cmd.arg("--pairs")
1008            .arg("--paths")
1009            .arg("--output")
1010            .arg("NAME,PARTTYPE")
1011            .arg(parent_device);
1012        for line in cmd_output(&mut cmd)?.lines() {
1013            let dev = split_lsblk_line(line);
1014            if dev.get("PARTTYPE").map(|t| t.as_str()) == Some(ESP_TYPE_GUID) {
1015                esps.push(
1016                    dev.get("NAME")
1017                        .cloned()
1018                        .context("ESP device with missing NAME")?,
1019                )
1020            }
1021        }
1022    }
1023    Ok(esps)
1024}
1025
1026/// This is basically a Rust version of:
1027/// https://github.com/coreos/coreos-assembler/blob/d3c7ec094a02/src/cmd-buildextend-live#L492-L495
1028pub fn find_efi_vendor_dir(efi_mount: &Mount) -> Result<PathBuf> {
1029    let p = efi_mount.mountpoint().join("EFI");
1030    let mut vendor_dir: Vec<PathBuf> = Vec::new();
1031    for ent in p.read_dir()? {
1032        let ent = ent.with_context(|| format!("reading directory entry in {}", p.display()))?;
1033        if !ent.file_type()?.is_dir() {
1034            continue;
1035        }
1036        let path = ent.path();
1037        if path.join("grub.cfg").is_file() {
1038            vendor_dir.push(path);
1039        }
1040    }
1041    if vendor_dir.len() != 1 {
1042        bail!(
1043            "Expected one vendor dir on {}, got {} ({:?})",
1044            efi_mount.device(),
1045            vendor_dir.len(),
1046            vendor_dir,
1047        );
1048    }
1049    Ok(vendor_dir.pop().unwrap())
1050}
1051
1052/// Parse key-value pairs from lsblk --pairs.
1053/// Newer versions of lsblk support JSON but the one in CentOS 7 doesn't.
1054fn split_lsblk_line(line: &str) -> HashMap<String, String> {
1055    lazy_static! {
1056        static ref REGEX: Regex = Regex::new(r#"([A-Z-_]+)="([^"]+)""#).unwrap();
1057    }
1058    let mut fields: HashMap<String, String> = HashMap::new();
1059    for cap in REGEX.captures_iter(line) {
1060        fields.insert(cap[1].to_string(), cap[2].to_string());
1061    }
1062    fields
1063}
1064
1065pub fn get_blkdev_deps(device: &Path) -> Result<Vec<PathBuf>> {
1066    let deps = {
1067        let mut p = PathBuf::from("/sys/block");
1068        p.push(
1069            device
1070                .canonicalize()
1071                .with_context(|| format!("canonicalizing {}", device.display()))?
1072                .file_name()
1073                .with_context(|| format!("path {} has no filename", device.display()))?,
1074        );
1075        p.push("slaves");
1076        p
1077    };
1078    let mut ret: Vec<PathBuf> = Vec::new();
1079    let dir_iter = match read_dir(&deps) {
1080        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(ret),
1081        Err(e) => return Err(e).with_context(|| format!("reading dir {}", &deps.display())),
1082        Ok(it) => it,
1083    };
1084    for ent in dir_iter {
1085        let ent = ent.with_context(|| format!("reading {} entry", &deps.display()))?;
1086        ret.push(Path::new("/dev").join(ent.file_name()));
1087    }
1088    Ok(ret)
1089}
1090
1091pub fn get_blkdev_deps_recursing(device: &Path) -> Result<Vec<PathBuf>> {
1092    let mut ret: Vec<PathBuf> = Vec::new();
1093    for dep in get_blkdev_deps(device)? {
1094        ret.extend(get_blkdev_deps_recursing(&dep)?);
1095        ret.push(dep);
1096    }
1097    Ok(ret)
1098}
1099
1100fn reread_partition_table(file: &mut File, retry: bool) -> Result<()> {
1101    let fd = file.as_raw_fd();
1102    // Reread sometimes fails inexplicably.  Retry several times before
1103    // giving up.
1104    let max_tries = if retry { 20 } else { 1 };
1105    for retries in (0..max_tries).rev() {
1106        let result = unsafe { ioctl::blkrrpart(fd) };
1107        match result {
1108            Ok(_) => break,
1109            Err(err) if retries == 0 && err == Errno::EINVAL => {
1110                return Err(err)
1111                    .context("couldn't reread partition table: device may not support partitions")
1112            }
1113            Err(err) if retries == 0 && err == Errno::EBUSY => {
1114                return Err(err).context("couldn't reread partition table: device is in use")
1115            }
1116            Err(err) if retries == 0 => return Err(err).context("couldn't reread partition table"),
1117            Err(_) => sleep(Duration::from_millis(100)),
1118        }
1119    }
1120    Ok(())
1121}
1122
1123/// Get the sector size of the block device at a given path.
1124pub fn get_sector_size_for_path(device: &Path) -> Result<NonZeroU32> {
1125    let dev = OpenOptions::new()
1126        .read(true)
1127        .open(device)
1128        .with_context(|| format!("opening {device:?}"))?;
1129
1130    if !dev
1131        .metadata()
1132        .with_context(|| format!("getting metadata for {device:?}"))?
1133        .file_type()
1134        .is_block_device()
1135    {
1136        bail!("{:?} is not a block device", device);
1137    }
1138
1139    get_sector_size(&dev)
1140}
1141
1142/// Get the logical sector size of a block device.
1143pub fn get_sector_size(file: &File) -> Result<NonZeroU32> {
1144    let fd = file.as_raw_fd();
1145    let mut size: c_int = 0;
1146    match unsafe { ioctl::blksszget(fd, &mut size) } {
1147        Ok(_) => {
1148            let size_u32: u32 = size
1149                .try_into()
1150                .with_context(|| format!("sector size {size} doesn't fit in u32"))?;
1151            NonZeroU32::new(size_u32).context("found sector size of zero")
1152        }
1153        Err(e) => Err(anyhow!(e).context("getting sector size")),
1154    }
1155}
1156
1157/// Get the size of a block device.
1158pub fn get_block_device_size(file: &File) -> Result<NonZeroU64> {
1159    let fd = file.as_raw_fd();
1160    let mut size: libc::size_t = 0;
1161    match unsafe { ioctl::blkgetsize64(fd, &mut size) } {
1162        // just cast using `as`: there is no platform we care about today where size_t > 64bits
1163        Ok(_) => NonZeroU64::new(size as u64).context("found block size of zero"),
1164        Err(e) => Err(anyhow!(e).context("getting block size")),
1165    }
1166}
1167
1168/// Get the size of the GPT metadata at the start of the disk.
1169pub fn get_gpt_size(file: &mut (impl Read + Seek)) -> Result<u64> {
1170    let gpt = GPT::find_from(file).context("reading GPT")?;
1171    Ok(gpt.header.first_usable_lba * gpt.sector_size)
1172}
1173
1174fn disk_has_mbr(file: &mut (impl Read + Seek)) -> Result<bool> {
1175    let mut sig = [0u8; 2];
1176    file.seek(SeekFrom::Start(510))
1177        .context("seeking to MBR signature")?;
1178    file.read_exact(&mut sig).context("reading MBR signature")?;
1179    Ok(sig == [0x55, 0xaa])
1180}
1181
1182pub fn have_udev() -> bool {
1183    Path::new("/run/udev/control").exists()
1184}
1185
1186pub fn udev_settle() -> Result<()> {
1187    // "udevadm settle" silently no-ops if the udev socket is missing, and
1188    // then lsblk can't find partition labels.  Catch this early.
1189    if !have_udev() {
1190        bail!("udevd socket missing; are we running in a container without /run/udev mounted?");
1191    }
1192
1193    // There's a potential window after rereading the partition table where
1194    // udevd hasn't yet received updates from the kernel, settle will return
1195    // immediately, and lsblk won't pick up partition labels.  Try to sleep
1196    // our way out of this.
1197    sleep(Duration::from_millis(200));
1198
1199    runcmd!("udevadm", "settle")?;
1200    Ok(())
1201}
1202
1203/// Inspect a buffer from the start of a disk image and return its formatted
1204/// sector size, if any can be determined.
1205pub fn detect_formatted_sector_size(buf: &[u8]) -> Option<NonZeroU32> {
1206    let gpt_magic: &[u8; 8] = b"EFI PART";
1207
1208    if buf.len() >= 520 && buf[512..520] == gpt_magic[..] {
1209        // GPT at offset 512
1210        NonZeroU32::new(512)
1211    } else if buf.len() >= 4104 && buf[4096..4104] == gpt_magic[..] {
1212        // GPT at offset 4096
1213        NonZeroU32::new(4096)
1214    } else {
1215        // Unknown
1216        None
1217    }
1218}
1219
1220/// Checks if underlying device is IBM DASD disk
1221pub fn is_dasd(device: &str, fd: Option<&mut File>) -> Result<bool> {
1222    let target =
1223        canonicalize(device).with_context(|| format!("getting absolute path to {device}"))?;
1224    if target.to_string_lossy().starts_with("/dev/dasd") {
1225        return Ok(true);
1226    }
1227    let read_magic = |device: &str, disk: &mut File| -> Result<[u8; 4]> {
1228        let offset = disk
1229            .stream_position()
1230            .with_context(|| format!("saving offset {device}"))?;
1231        disk.seek(SeekFrom::Start(8194))
1232            .with_context(|| format!("seeking {device}"))?;
1233        let mut lbl = [0u8; 4];
1234        disk.read_exact(&mut lbl)
1235            .with_context(|| format!("reading label {device}"))?;
1236        disk.seek(SeekFrom::Start(offset))
1237            .with_context(|| format!("restoring offset {device}"))?;
1238        Ok(lbl)
1239    };
1240    if target.to_string_lossy().starts_with("/dev/vd") {
1241        let cdl_magic = [0xd3, 0xf1, 0xe5, 0xd6];
1242        let lbl = if let Some(t) = fd {
1243            read_magic(device, t)?
1244        } else {
1245            let mut disk = File::open(device).with_context(|| format!("opening {device}"))?;
1246            read_magic(device, &mut disk)?
1247        };
1248        return Ok(cdl_magic == lbl);
1249    }
1250    Ok(false)
1251}
1252
1253// create unsafe ioctl wrappers
1254#[allow(clippy::missing_safety_doc)]
1255mod ioctl {
1256    use super::c_int;
1257    use nix::{ioctl_none, ioctl_read, ioctl_read_bad, request_code_none};
1258    ioctl_none!(blkrrpart, 0x12, 95);
1259    ioctl_read_bad!(blksszget, request_code_none!(0x12, 104), c_int);
1260    ioctl_read!(blkgetsize64, 0x12, 114, libc::size_t);
1261}
1262
1263#[cfg(test)]
1264mod tests {
1265    use super::*;
1266    use maplit::hashmap;
1267    use std::io::copy;
1268    use tempfile::tempfile;
1269    use xz2::read::XzDecoder;
1270
1271    #[test]
1272    fn lsblk_split() {
1273        assert_eq!(
1274            split_lsblk_line(r#"NAME="sda" LABEL="" FSTYPE="""#),
1275            hashmap! {
1276                String::from("NAME") => String::from("sda"),
1277            }
1278        );
1279        assert_eq!(
1280            split_lsblk_line(r#"NAME="sda1" LABEL="" FSTYPE="vfat""#),
1281            hashmap! {
1282                String::from("NAME") => String::from("sda1"),
1283                String::from("FSTYPE") => String::from("vfat")
1284            }
1285        );
1286        assert_eq!(
1287            split_lsblk_line(r#"NAME="sda2" LABEL="boot" FSTYPE="ext4""#),
1288            hashmap! {
1289                String::from("NAME") => String::from("sda2"),
1290                String::from("LABEL") => String::from("boot"),
1291                String::from("FSTYPE") => String::from("ext4"),
1292            }
1293        );
1294        assert_eq!(
1295            split_lsblk_line(r#"NAME="sda3" LABEL="foo=\x22bar\x22 baz" FSTYPE="ext4""#),
1296            hashmap! {
1297                String::from("NAME") => String::from("sda3"),
1298                // for now, we don't care about resolving lsblk's hex escapes,
1299                // so we just pass them through
1300                String::from("LABEL") => String::from(r#"foo=\x22bar\x22 baz"#),
1301                String::from("FSTYPE") => String::from("ext4"),
1302            }
1303        );
1304    }
1305
1306    #[test]
1307    fn blkid_split() {
1308        assert_eq!(split_blkid_line(r#""#), std::collections::HashMap::new());
1309        assert_eq!(split_blkid_line(r#" : "#), std::collections::HashMap::new());
1310
1311        assert_eq!(
1312            split_blkid_line(r#": UUID="0000""#),
1313            std::collections::HashMap::new()
1314        );
1315
1316        assert_eq!(
1317            split_blkid_line(r#"/dev/empty:"#),
1318            hashmap! {
1319                String::from("NAME") => String::from("/dev/empty")
1320            }
1321        );
1322
1323        assert_eq!(
1324            split_blkid_line(
1325                r#"/dev/mapper/luks-f022921b-0100-4d48-9812-cfa6c225060a: UUID="2ff16ac3-103f-41d4-8e02-03686e255270" BLOCK_SIZE="4096" TYPE="ext4""#
1326            ),
1327            hashmap! {
1328                String::from("NAME") => String::from("/dev/mapper/luks-f022921b-0100-4d48-9812-cfa6c225060a"),
1329                String::from("UUID") => String::from("2ff16ac3-103f-41d4-8e02-03686e255270"),
1330                String::from("TYPE") => String::from("ext4"),
1331                String::from("BLOCK_SIZE") => String::from("4096")
1332            }
1333        );
1334
1335        assert_eq!(
1336            split_blkid_line(
1337                r#"/dev/vdb4: UUID="fdc69fb1-d7f3-4696-846e-b2275504f63c" LABEL="crypt_rootfs" TYPE="crypto_LUKS" PARTLABEL="root" PARTUUID="835753cb-d7f0-465e-84db-07860d3da2f6""#
1338            ),
1339            hashmap! {
1340                String::from("NAME") => String::from("/dev/vdb4"),
1341                String::from("LABEL") => String::from("crypt_rootfs"),
1342                String::from("UUID") => String::from("fdc69fb1-d7f3-4696-846e-b2275504f63c"),
1343                String::from("TYPE") => String::from("crypto_LUKS"),
1344                String::from("PARTLABEL") => String::from("root"),
1345                String::from("PARTUUID") => String::from("835753cb-d7f0-465e-84db-07860d3da2f6"),
1346            }
1347        );
1348    }
1349
1350    #[test]
1351    fn disk_sector_size_reader() {
1352        struct Test {
1353            name: &'static str,
1354            data: &'static [u8],
1355            compressed: bool,
1356            result: Option<NonZeroU32>,
1357        }
1358        let tests = vec![
1359            Test {
1360                name: "zero-length",
1361                data: b"",
1362                compressed: false,
1363                result: None,
1364            },
1365            Test {
1366                name: "empty-disk",
1367                data: include_bytes!("../fixtures/empty.xz"),
1368                compressed: true,
1369                result: None,
1370            },
1371            Test {
1372                name: "gpt-512",
1373                data: include_bytes!("../fixtures/gpt-512.xz"),
1374                compressed: true,
1375                result: NonZeroU32::new(512),
1376            },
1377            Test {
1378                name: "gpt-4096",
1379                data: include_bytes!("../fixtures/gpt-4096.xz"),
1380                compressed: true,
1381                result: NonZeroU32::new(4096),
1382            },
1383        ];
1384
1385        for test in tests {
1386            let data = if test.compressed {
1387                let mut decoder = XzDecoder::new(test.data);
1388                let mut data: Vec<u8> = Vec::new();
1389                decoder.read_to_end(&mut data).expect("decompress failed");
1390                data
1391            } else {
1392                test.data.to_vec()
1393            };
1394            assert_eq!(
1395                detect_formatted_sector_size(&data),
1396                test.result,
1397                "{}",
1398                test.name
1399            );
1400        }
1401    }
1402
1403    #[test]
1404    fn test_saved_partitions() {
1405        use PartitionFilter::*;
1406
1407        let make_part = |i: u32, name: &str, start: u64, end: u64| {
1408            (
1409                i,
1410                GPTPartitionEntry {
1411                    partition_type_guid: make_guid("type"),
1412                    unique_partition_guid: make_guid(&format!("{name} {start} {end}")),
1413                    starting_lba: start * 2048,
1414                    ending_lba: end * 2048 - 1,
1415                    attribute_bits: 0,
1416                    partition_name: name.into(),
1417                },
1418            )
1419        };
1420
1421        let base_parts = vec![
1422            make_part(1, "one", 1, 1024),
1423            make_part(2, "two", 1024, 2048),
1424            make_part(3, "three", 2048, 3072),
1425            make_part(4, "four", 3072, 4096),
1426            make_part(5, "five", 4096, 5120),
1427            make_part(7, "seven", 5120, 6144),
1428            make_part(8, "eight", 6144, 7168),
1429            make_part(9, "nine", 7168, 8192),
1430            make_part(10, "", 8192, 8193),
1431            make_part(11, "", 8193, 8194),
1432        ];
1433        let image_parts = vec![
1434            make_part(1, "boot", 1, 384),
1435            make_part(2, "EFI-SYSTEM", 384, 512),
1436            make_part(4, "root", 1024, 2200),
1437        ];
1438        let merge_base_parts = vec![make_part(2, "unused", 500, 3500)];
1439
1440        let index = |i| Some(NonZeroU32::new(i).unwrap());
1441        let label = |l| Label(glob::Pattern::new(l).unwrap());
1442        let tests = vec![
1443            // Partition range
1444            (
1445                vec![Index(index(5), None)],
1446                vec![
1447                    make_part(5, "five", 4096, 5120),
1448                    make_part(7, "seven", 5120, 6144),
1449                    make_part(8, "eight", 6144, 7168),
1450                    make_part(9, "nine", 7168, 8192),
1451                    make_part(10, "", 8192, 8193),
1452                    make_part(11, "", 8193, 8194),
1453                ],
1454                vec![
1455                    make_part(1, "boot", 1, 384),
1456                    make_part(2, "EFI-SYSTEM", 384, 512),
1457                    make_part(4, "root", 1024, 2200),
1458                    make_part(5, "five", 4096, 5120),
1459                    make_part(7, "seven", 5120, 6144),
1460                    make_part(8, "eight", 6144, 7168),
1461                    make_part(9, "nine", 7168, 8192),
1462                    make_part(10, "", 8192, 8193),
1463                    make_part(11, "", 8193, 8194),
1464                ],
1465            ),
1466            // Glob
1467            (
1468                vec![label("*i*")],
1469                vec![
1470                    make_part(5, "five", 4096, 5120),
1471                    make_part(8, "eight", 6144, 7168),
1472                    make_part(9, "nine", 7168, 8192),
1473                ],
1474                vec![
1475                    make_part(1, "boot", 1, 384),
1476                    make_part(2, "EFI-SYSTEM", 384, 512),
1477                    make_part(4, "root", 1024, 2200),
1478                    make_part(5, "five", 4096, 5120),
1479                    make_part(8, "eight", 6144, 7168),
1480                    make_part(9, "nine", 7168, 8192),
1481                ],
1482            ),
1483            // Missing label, single partition, irrelevant range
1484            (
1485                vec![
1486                    label("six"),
1487                    Index(index(7), index(7)),
1488                    Index(index(15), None),
1489                ],
1490                vec![make_part(7, "seven", 5120, 6144)],
1491                vec![
1492                    make_part(1, "boot", 1, 384),
1493                    make_part(2, "EFI-SYSTEM", 384, 512),
1494                    make_part(4, "root", 1024, 2200),
1495                    make_part(7, "seven", 5120, 6144),
1496                ],
1497            ),
1498            // Empty label match, multiple results
1499            (
1500                vec![label("")],
1501                vec![make_part(10, "", 8192, 8193), make_part(11, "", 8193, 8194)],
1502                vec![
1503                    make_part(1, "boot", 1, 384),
1504                    make_part(2, "EFI-SYSTEM", 384, 512),
1505                    make_part(4, "root", 1024, 2200),
1506                    make_part(10, "", 8192, 8193),
1507                    make_part(11, "", 8193, 8194),
1508                ],
1509            ),
1510            // Partition renumbering
1511            (
1512                vec![Index(index(4), None)],
1513                vec![
1514                    make_part(4, "four", 3072, 4096),
1515                    make_part(5, "five", 4096, 5120),
1516                    make_part(7, "seven", 5120, 6144),
1517                    make_part(8, "eight", 6144, 7168),
1518                    make_part(9, "nine", 7168, 8192),
1519                    make_part(10, "", 8192, 8193),
1520                    make_part(11, "", 8193, 8194),
1521                ],
1522                vec![
1523                    make_part(1, "boot", 1, 384),
1524                    make_part(2, "EFI-SYSTEM", 384, 512),
1525                    make_part(4, "root", 1024, 2200),
1526                    make_part(5, "four", 3072, 4096),
1527                    make_part(6, "five", 4096, 5120),
1528                    make_part(7, "seven", 5120, 6144),
1529                    make_part(8, "eight", 6144, 7168),
1530                    make_part(9, "nine", 7168, 8192),
1531                    make_part(10, "", 8192, 8193),
1532                    make_part(11, "", 8193, 8194),
1533                ],
1534            ),
1535            // No saved partitions
1536            (
1537                vec![Index(index(15), None)],
1538                vec![],
1539                merge_base_parts.clone(),
1540            ),
1541            // No filters
1542            (vec![], vec![], merge_base_parts.clone()),
1543        ];
1544
1545        let mut base = make_disk(512, &base_parts);
1546        let mut image = make_disk(512, &image_parts);
1547        for (testnum, (filter, expected_blank, expected_image)) in tests.iter().enumerate() {
1548            // try overwriting on blank disk
1549            let saved = SavedPartitions::new_from_file(&mut base, 512, filter).unwrap();
1550            let mut disk = make_unformatted_disk();
1551            saved.overwrite(&mut disk).unwrap();
1552            assert!(disk_has_mbr(&mut disk).unwrap(), "test {testnum}");
1553            let result = GPT::find_from(&mut disk).unwrap();
1554            assert_eq!(
1555                get_gpt_size(&mut disk).unwrap(),
1556                512 * result.header.first_usable_lba
1557            );
1558            assert_partitions_eq(expected_blank, &result, &format!("test {testnum} blank"));
1559
1560            // try merging with image disk onto merge_base disk
1561            let mut disk = make_disk(512, &merge_base_parts);
1562            saved.merge(&mut image, &mut disk).unwrap();
1563            assert!(
1564                disk_has_mbr(&mut disk).unwrap() != expected_blank.is_empty(),
1565                "test {testnum}"
1566            );
1567            let result = GPT::find_from(&mut disk).unwrap();
1568            assert_eq!(
1569                get_gpt_size(&mut disk).unwrap(),
1570                512 * result.header.first_usable_lba
1571            );
1572            assert_partitions_eq(expected_image, &result, &format!("test {testnum} image"));
1573            assert_eq!(
1574                saved.get_offset().unwrap(),
1575                match expected_blank.is_empty() {
1576                    true => None,
1577                    false => {
1578                        let (i, p) = &expected_blank[0];
1579                        Some((
1580                            p.starting_lba * 512,
1581                            format!("partition {} (\"{}\")", i, p.partition_name.as_str()),
1582                        ))
1583                    }
1584                },
1585                "test {testnum}"
1586            );
1587        }
1588
1589        // ensure overwrite clobbers every byte of MBR
1590        for sector_size in [512_usize, 4096_usize].iter() {
1591            let mut disk = make_unformatted_disk();
1592            disk.write_all(&vec![0xdau8; *sector_size]).unwrap();
1593            let saved =
1594                SavedPartitions::new_from_file(&mut disk, *sector_size as u64, &[]).unwrap();
1595            saved.overwrite(&mut disk).unwrap();
1596            assert!(disk_has_mbr(&mut disk).unwrap(), "{}", *sector_size);
1597            disk.rewind().unwrap();
1598            let mut buf = vec![0u8; *sector_size + 1];
1599            disk.read_exact(&mut buf).unwrap();
1600            assert_eq!(
1601                buf.iter().position(|v| *v == 0xda),
1602                None,
1603                "{}",
1604                *sector_size
1605            );
1606            // verify the first byte of the GPT magic number is intact
1607            assert_eq!(buf[*sector_size], 0x45u8, "{}", *sector_size);
1608        }
1609
1610        // test merging with unformatted initial disk
1611        let mut disk = make_unformatted_disk();
1612        let saved = SavedPartitions::new_from_file(&mut disk, 512, &[label("z")]).unwrap();
1613        let mut disk = make_disk(512, &merge_base_parts);
1614        saved.merge(&mut image, &mut disk).unwrap();
1615        let result = GPT::find_from(&mut disk).unwrap();
1616        assert_partitions_eq(&merge_base_parts, &result, "unformatted disk");
1617
1618        // test overlapping partitions
1619        let saved =
1620            SavedPartitions::new_from_file(&mut base, 512, &[Index(index(1), index(1))]).unwrap();
1621        let mut disk = make_disk(512, &merge_base_parts);
1622        let err = saved.merge(&mut image, &mut disk).unwrap_err();
1623        assert!(
1624            format!("{err:#}").contains(&gptman::Error::InvalidPartitionBoundaries.to_string()),
1625            "incorrect error: {err:#}"
1626        );
1627
1628        // test trying to save partitions from a MBR disk
1629        let mut disk = make_unformatted_disk();
1630        gptman::GPT::write_protective_mbr_into(&mut disk, 512).unwrap();
1631        // label only
1632        SavedPartitions::new(&mut disk, 512, &[label("*i*")]).unwrap();
1633        // index only
1634        assert_eq!(
1635            SavedPartitions::new(&mut disk, 512, &[Index(index(1), index(1))])
1636                .unwrap_err()
1637                .to_string(),
1638            "saving partitions from an MBR disk is not yet supported"
1639        );
1640        // label and index
1641        assert_eq!(
1642            SavedPartitions::new(&mut disk, 512, &[Index(index(1), index(1)), label("*i*")])
1643                .unwrap_err()
1644                .to_string(),
1645            "saving partitions from an MBR disk is not yet supported"
1646        );
1647
1648        // test sector size mismatch
1649        let saved = SavedPartitions::new_from_file(&mut base, 512, &[label("*i*")]).unwrap();
1650        let mut image_4096 = make_disk(4096, &image_parts);
1651        assert_eq!(
1652            get_gpt_size(&mut image_4096).unwrap(),
1653            4096 * GPT::find_from(&mut image_4096)
1654                .unwrap()
1655                .header
1656                .first_usable_lba
1657        );
1658        let mut disk = make_disk(4096, &merge_base_parts);
1659        assert_eq!(
1660            saved
1661                .merge(&mut image_4096, &mut disk)
1662                .unwrap_err()
1663                .to_string(),
1664            "GPT sector size 4096 doesn't match expected 512"
1665        );
1666
1667        // test copying invalid partitions
1668        let mut disk = make_unformatted_disk();
1669        let data = include_bytes!("../fixtures/gpt-512-duplicate-partition-guids.xz");
1670        copy(&mut XzDecoder::new(&data[..]), &mut disk).unwrap();
1671        assert_eq!(
1672            SavedPartitions::new_from_file(&mut disk, 512, &[label("*")])
1673                .unwrap_err()
1674                .to_string(),
1675            "failed dry run restoring saved partitions; input partition table may be invalid"
1676        );
1677
1678        // test corrupt input partition table
1679        for sector_size in &[512, 4096] {
1680            let sector_size: u64 = *sector_size;
1681            // backup corrupt
1682            let mut disk = make_damaged_disk(sector_size, &base_parts, false, true);
1683            let saved = SavedPartitions::new_from_file(&mut disk, sector_size, &[]).unwrap();
1684            assert!(!saved.is_saved());
1685            let saved =
1686                SavedPartitions::new_from_file(&mut disk, sector_size, &[label("one")]).unwrap();
1687            assert!(saved.is_saved());
1688            // primary corrupt
1689            let mut disk = make_damaged_disk(sector_size, &base_parts, true, false);
1690            let saved = SavedPartitions::new_from_file(&mut disk, sector_size, &[]).unwrap();
1691            assert!(!saved.is_saved());
1692            let saved =
1693                SavedPartitions::new_from_file(&mut disk, sector_size, &[label("one")]).unwrap();
1694            assert!(saved.is_saved());
1695            // both corrupt
1696            let mut disk = make_damaged_disk(sector_size, &base_parts, true, true);
1697            let saved = SavedPartitions::new_from_file(&mut disk, sector_size, &[]).unwrap();
1698            assert!(!saved.is_saved());
1699            let err = SavedPartitions::new_from_file(&mut disk, sector_size, &[label("one")])
1700                .unwrap_err();
1701            assert!(
1702                format!("{err:#}").contains("could not read primary header"),
1703                "incorrect error: {err:#}"
1704            );
1705        }
1706    }
1707
1708    // TODO: The partitions array assumes 512-byte sectors and we don't
1709    // scale the start/end values for 4096.  This doesn't matter right now
1710    // because the only use of 4096-byte sectors is in an error test.
1711    fn make_disk(sector_size: u64, partitions: &Vec<(u32, GPTPartitionEntry)>) -> File {
1712        let mut disk = make_unformatted_disk();
1713        // Make the disk just large enough for its partitions, then resize
1714        // it back up afterward.  This tests that we properly handle copying
1715        // saved partitions from the larger base disk into the smaller
1716        // install image.
1717        let len = if partitions.is_empty() {
1718            1024 * 1024
1719        } else {
1720            partitions[partitions.len() - 1].1.ending_lba * sector_size + 1024 * 1024
1721        };
1722        disk.set_len(len).unwrap();
1723        let mut gpt = GPT::new_from(&mut disk, sector_size, make_guid("disk")).unwrap();
1724        for (partnum, entry) in partitions {
1725            gpt[*partnum] = entry.clone();
1726        }
1727        gpt.write_into(&mut disk).unwrap();
1728        disk.set_len(10 * 1024 * 1024 * 1024).unwrap();
1729        disk
1730    }
1731
1732    fn make_unformatted_disk() -> File {
1733        let disk = tempfile().unwrap();
1734        disk.set_len(10 * 1024 * 1024 * 1024).unwrap();
1735        disk
1736    }
1737
1738    fn make_damaged_disk(
1739        sector_size: u64,
1740        partitions: &Vec<(u32, GPTPartitionEntry)>,
1741        damage_primary: bool,
1742        damage_backup: bool,
1743    ) -> File {
1744        let mut disk = make_unformatted_disk();
1745        // don't use make_disk() because it intentionally misaligns the
1746        // backup GPT
1747        let mut gpt = GPT::new_from(&mut disk, sector_size, make_guid("disk")).unwrap();
1748        for (partnum, entry) in partitions {
1749            gpt[*partnum] = entry.clone();
1750            gpt[*partnum].starting_lba /= sector_size / 512;
1751            gpt[*partnum].ending_lba /= sector_size / 512;
1752        }
1753        gpt.write_into(&mut disk).unwrap();
1754        if damage_primary {
1755            // write garbage to the HeaderCRC32
1756            disk.seek(SeekFrom::Start(gpt.header.primary_lba * sector_size + 16))
1757                .unwrap();
1758            disk.write_all(&[0x15, 0xcd, 0x5b, 0x07]).unwrap();
1759        }
1760        if damage_backup {
1761            // write garbage to the HeaderCRC32
1762            disk.seek(SeekFrom::Start(gpt.header.backup_lba * sector_size + 16))
1763                .unwrap();
1764            disk.write_all(&[0xb1, 0x68, 0xde, 0x3a]).unwrap();
1765        }
1766        disk
1767    }
1768
1769    fn make_guid(seed: &str) -> [u8; 16] {
1770        let mut guid = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
1771        for (i, b) in seed.as_bytes().iter().enumerate() {
1772            guid[i % guid.len()] ^= *b;
1773        }
1774        guid
1775    }
1776
1777    fn assert_partitions_eq(expected: &[(u32, GPTPartitionEntry)], found: &GPT, message: &str) {
1778        assert_eq!(
1779            expected
1780                .iter()
1781                .map(|(i, p)| (*i, p))
1782                .collect::<Vec<(u32, &GPTPartitionEntry)>>(),
1783            found
1784                .iter()
1785                .filter(|(_, p)| p.is_used())
1786                .collect::<Vec<(u32, &GPTPartitionEntry)>>(),
1787            "{message}"
1788        );
1789    }
1790}