Skip to main content

btrfs_uapi/
dev_extent.rs

1//! # Device extent tree: per-device physical extent layout
2//!
3//! Walks the device tree (`BTRFS_DEV_TREE_OBJECTID`) to enumerate
4//! `BTRFS_DEV_EXTENT_KEY` items for a given device, then computes the
5//! minimum size to which the device can be shrunk.
6
7use crate::{
8    field_size,
9    raw::{BTRFS_DEV_EXTENT_KEY, BTRFS_DEV_TREE_OBJECTID, btrfs_dev_extent},
10    tree_search::{SearchKey, tree_search},
11};
12use std::os::unix::io::BorrowedFd;
13
14const DEV_EXTENT_LENGTH_OFF: usize = std::mem::offset_of!(btrfs_dev_extent, length);
15
16const SZ_1M: u64 = 1024 * 1024;
17const SZ_32M: u64 = 32 * 1024 * 1024;
18
19/// Number of superblock mirror copies btrfs maintains.
20const BTRFS_SUPER_MIRROR_MAX: usize = 3;
21
22/// Return the byte offset of superblock mirror `i`.
23///
24/// Mirror 0 is at 64 KiB, mirror 1 at 64 MiB, mirror 2 at 256 GiB.
25fn sb_offset(i: usize) -> u64 {
26    match i {
27        0 => 64 * 1024,
28        _ => 1u64 << (20 + 10 * (i as u64)),
29    }
30}
31
32/// A contiguous physical byte range on a device (inclusive end).
33#[derive(Debug, Clone, Copy)]
34struct Extent {
35    start: u64,
36    /// Inclusive end byte.
37    end: u64,
38}
39
40/// Compute the minimum size to which device `devid` can be shrunk.
41///
42/// Walks the device tree for all `DEV_EXTENT_KEY` items belonging to
43/// `devid`, sums their lengths, then adjusts for extents that sit beyond
44/// the sum by checking whether they can be relocated into holes closer to
45/// the start of the device. The algorithm matches `btrfs inspect-internal
46/// min-dev-size` from btrfs-progs.
47///
48/// Requires `CAP_SYS_ADMIN`.
49pub fn min_dev_size(fd: BorrowedFd, devid: u64) -> nix::Result<u64> {
50    let mut min_size: u64 = SZ_1M;
51    let mut extents: Vec<Extent> = Vec::new();
52    let mut holes: Vec<Extent> = Vec::new();
53    let mut last_pos: Option<u64> = None;
54
55    tree_search(
56        fd,
57        SearchKey::for_objectid_range(
58            BTRFS_DEV_TREE_OBJECTID as u64,
59            BTRFS_DEV_EXTENT_KEY,
60            devid,
61            devid,
62        ),
63        |hdr, data| {
64            if data.len() < DEV_EXTENT_LENGTH_OFF + field_size!(btrfs_dev_extent, length) {
65                return Ok(());
66            }
67            let phys_start = hdr.offset;
68            let len = read_le_u64(data, DEV_EXTENT_LENGTH_OFF);
69
70            min_size += len;
71
72            // Extents are prepended (descending end offset) so that the
73            // adjustment pass processes the highest-addressed extent first.
74            extents.push(Extent {
75                start: phys_start,
76                end: phys_start + len - 1,
77            });
78
79            if let Some(prev_end) = last_pos {
80                if prev_end != phys_start {
81                    holes.push(Extent {
82                        start: prev_end,
83                        end: phys_start - 1,
84                    });
85                }
86            }
87
88            last_pos = Some(phys_start + len);
89            Ok(())
90        },
91    )?;
92
93    // Sort extents by descending end offset for the adjustment pass.
94    extents.sort_by(|a, b| b.end.cmp(&a.end));
95
96    adjust_min_size(&mut extents, &mut holes, &mut min_size);
97
98    Ok(min_size)
99}
100
101/// Check whether a byte range `[start, end]` contains a superblock mirror.
102fn hole_includes_sb_mirror(start: u64, end: u64) -> bool {
103    (0..BTRFS_SUPER_MIRROR_MAX).any(|i| {
104        let bytenr = sb_offset(i);
105        bytenr >= start && bytenr <= end
106    })
107}
108
109/// Adjust `min_size` downward by relocating tail extents into holes.
110///
111/// Processes extents in descending order of end offset. If an extent sits
112/// beyond the current `min_size`, try to find a hole large enough to
113/// relocate it. If no hole fits, the device cannot be shrunk past that
114/// extent and `min_size` is set to its end + 1.
115///
116/// Adds scratch space (largest relocated extent + 32 MiB for a potential
117/// system chunk allocation) when any relocation is needed.
118fn adjust_min_size(extents: &mut Vec<Extent>, holes: &mut Vec<Extent>, min_size: &mut u64) {
119    let mut scratch_space: u64 = 0;
120
121    while let Some(&ext) = extents.first() {
122        if ext.end < *min_size {
123            break;
124        }
125
126        let extent_len = ext.end - ext.start + 1;
127
128        // Find the first hole large enough to hold this extent.
129        let hole_idx = holes.iter().position(|h| {
130            let hole_len = h.end - h.start + 1;
131            hole_len >= extent_len
132        });
133
134        let Some(idx) = hole_idx else {
135            *min_size = ext.end + 1;
136            break;
137        };
138
139        // If the target hole contains a superblock mirror location,
140        // pessimistically assume we need one more extent worth of space.
141        if hole_includes_sb_mirror(holes[idx].start, holes[idx].start + extent_len - 1) {
142            *min_size += extent_len;
143        }
144
145        // Shrink or remove the hole.
146        let hole_len = holes[idx].end - holes[idx].start + 1;
147        if hole_len > extent_len {
148            holes[idx].start += extent_len;
149        } else {
150            holes.remove(idx);
151        }
152
153        extents.remove(0);
154
155        if extent_len > scratch_space {
156            scratch_space = extent_len;
157        }
158    }
159
160    if scratch_space > 0 {
161        *min_size += scratch_space;
162        // Chunk allocation may require a new system chunk (up to 32 MiB).
163        *min_size += SZ_32M;
164    }
165}
166
167fn read_le_u64(buf: &[u8], off: usize) -> u64 {
168    u64::from_le_bytes(buf[off..off + 8].try_into().unwrap())
169}