Skip to main content

btrfs_uapi/
chunk.rs

1//! # Per-device physical allocation data from the chunk tree
2//!
3//! Walks the chunk tree to determine how many bytes each device has allocated,
4//! broken down by block-group profile flags.  This is the data source for the
5//! per-device breakdown in `btrfs filesystem usage`.
6//!
7//! Also exposes the full per-stripe chunk list used by `inspect-internal
8//! list-chunks`, including the bytes-used figure from the extent tree.
9//!
10//! Requires `CAP_SYS_ADMIN`.
11
12use crate::{
13    raw::{
14        BTRFS_BLOCK_GROUP_ITEM_KEY, BTRFS_CHUNK_ITEM_KEY,
15        BTRFS_CHUNK_TREE_OBJECTID, BTRFS_EXTENT_TREE_OBJECTID,
16        BTRFS_FIRST_CHUNK_TREE_OBJECTID,
17    },
18    space::BlockGroupFlags,
19    tree_search::{SearchKey, tree_search},
20};
21use btrfs_disk::items::ChunkItem;
22use std::os::unix::io::BorrowedFd;
23
24/// Physical allocation of one block-group profile on one device, as read
25/// from the chunk tree.
26///
27/// `bytes` is the sum of `stripe_len` over all chunk stripes that land on
28/// `devid` and share the same `flags`.  This is the physical space the device
29/// contributes to that profile, not the logical (usable) space.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct DeviceAllocation {
32    /// btrfs device ID.
33    pub devid: u64,
34    /// Combined block-group type and profile flags, e.g.
35    /// `BlockGroupFlags::DATA | BlockGroupFlags::RAID1`.
36    pub flags: BlockGroupFlags,
37    /// Physical bytes allocated on this device for chunks with these flags.
38    pub bytes: u64,
39}
40
41/// One physical chunk stripe as seen in the chunk tree, with usage data from
42/// the extent tree.
43///
44/// For striped profiles (RAID0, RAID10, …) each logical chunk maps to
45/// multiple stripes on different devices; each stripe yields one `ChunkEntry`.
46/// For non-striped profiles (single, DUP) there is one `ChunkEntry` per chunk.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct ChunkEntry {
49    /// btrfs device ID that holds this stripe.
50    pub devid: u64,
51    /// Physical byte offset of this stripe on the device.
52    pub physical_start: u64,
53    /// Logical byte offset of the chunk within the filesystem address space.
54    pub logical_start: u64,
55    /// Logical length of the chunk in bytes (shared across all stripes of
56    /// the same chunk).
57    pub length: u64,
58    /// Combined block-group type and profile flags.
59    pub flags: BlockGroupFlags,
60    /// Bytes currently used within this chunk, as reported by the extent tree.
61    /// `0` if the block-group item could not be read.
62    pub used: u64,
63}
64
65/// Walk the chunk tree of the filesystem referred to by `fd` and return the
66/// physical allocation of each block-group profile on each device.
67///
68/// The result may contain multiple entries with the same `devid` when a
69/// device participates in chunks of different profiles (e.g. both
70/// `DATA|SINGLE` and `METADATA|DUP`).  Entries with the same `(devid, flags)`
71/// pair are merged — there will be at most one entry per unique pair.
72///
73/// Internally, each `BTRFS_CHUNK_ITEM_KEY` payload is a packed `btrfs_chunk`
74/// struct followed by `num_stripes - 1` additional `btrfs_stripe` structs.
75/// The `stripe_len` field of each stripe is accumulated per `(devid, flags)`
76/// to produce the physical byte counts in the returned list.
77///
78/// # Errors
79///
80/// Returns `Err` if the tree search ioctl fails.
81pub fn device_chunk_allocations(
82    fd: BorrowedFd,
83) -> nix::Result<Vec<DeviceAllocation>> {
84    let mut allocs: Vec<DeviceAllocation> = Vec::new();
85
86    tree_search(
87        fd,
88        SearchKey::for_type(
89            u64::from(BTRFS_CHUNK_TREE_OBJECTID),
90            BTRFS_CHUNK_ITEM_KEY,
91        ),
92        |_hdr, data| {
93            if let Some((stripe_len, flags, stripes)) = parse_chunk(data) {
94                for devid in stripes {
95                    accumulate(&mut allocs, devid, flags, stripe_len);
96                }
97            }
98            Ok(())
99        },
100    )?;
101
102    Ok(allocs)
103}
104
105/// Walk the chunk tree and return one [`ChunkEntry`] per stripe, including
106/// bytes-used from the extent tree.
107///
108/// The returned list is in chunk-tree order (ascending logical offset); call
109/// sites are responsible for any further sorting.  For each logical chunk the
110/// `used` field is populated by a single extent-tree lookup; if that lookup
111/// fails the field is set to `0` rather than propagating an error.
112///
113/// Requires `CAP_SYS_ADMIN`.
114///
115/// # Errors
116///
117/// Returns `Err` if the tree search ioctl fails.
118pub fn chunk_list(fd: BorrowedFd) -> nix::Result<Vec<ChunkEntry>> {
119    let mut entries: Vec<ChunkEntry> = Vec::new();
120
121    tree_search(
122        fd,
123        SearchKey::for_objectid_range(
124            u64::from(BTRFS_CHUNK_TREE_OBJECTID),
125            BTRFS_CHUNK_ITEM_KEY,
126            u64::from(BTRFS_FIRST_CHUNK_TREE_OBJECTID),
127            u64::from(BTRFS_FIRST_CHUNK_TREE_OBJECTID),
128        ),
129        |hdr, data| {
130            if let Some(chunk) = ChunkItem::parse(data) {
131                let logical_start = hdr.offset;
132                let flags = BlockGroupFlags::from_bits_truncate(
133                    chunk.chunk_type.bits(),
134                );
135                let used = block_group_used(fd, logical_start).unwrap_or(0);
136                for stripe in &chunk.stripes {
137                    entries.push(ChunkEntry {
138                        devid: stripe.devid,
139                        physical_start: stripe.offset,
140                        logical_start,
141                        length: chunk.length,
142                        flags,
143                        used,
144                    });
145                }
146            }
147            Ok(())
148        },
149    )?;
150
151    Ok(entries)
152}
153
154/// Look up the bytes-used counter for the block group at `logical_start` by
155/// searching for `BTRFS_BLOCK_GROUP_ITEM_KEY` in the extent tree.
156///
157/// Returns `None` if the block group item is not found or cannot be read.
158fn block_group_used(fd: BorrowedFd, logical_start: u64) -> Option<u64> {
159    let mut used: Option<u64> = None;
160    tree_search(
161        fd,
162        SearchKey {
163            tree_id: u64::from(BTRFS_EXTENT_TREE_OBJECTID),
164            min_objectid: logical_start,
165            max_objectid: logical_start,
166            min_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
167            max_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
168            min_offset: 0,
169            max_offset: u64::MAX,
170            min_transid: 0,
171            max_transid: u64::MAX,
172        },
173        |_hdr, data| {
174            if let Some(bg) = btrfs_disk::items::BlockGroupItem::parse(data) {
175                used = Some(bg.used);
176            }
177            Ok(())
178        },
179    )
180    .ok()?;
181    used
182}
183
184/// Parse a raw chunk item payload.
185///
186/// Returns `(stripe_len, flags, devids)` on success, or `None` if the buffer
187/// is too small to be a valid chunk item.
188fn parse_chunk(data: &[u8]) -> Option<(u64, BlockGroupFlags, Vec<u64>)> {
189    let chunk = ChunkItem::parse(data)?;
190    let flags = BlockGroupFlags::from_bits_truncate(chunk.chunk_type.bits());
191    let devids: Vec<u64> = chunk.stripes.iter().map(|s| s.devid).collect();
192    Some((chunk.stripe_len, flags, devids))
193}
194
195/// Add `stripe_len` bytes to the `(devid, flags)` entry, creating it if
196/// it does not yet exist.
197fn accumulate(
198    allocs: &mut Vec<DeviceAllocation>,
199    devid: u64,
200    flags: BlockGroupFlags,
201    bytes: u64,
202) {
203    if let Some(entry) = allocs
204        .iter_mut()
205        .find(|a| a.devid == devid && a.flags == flags)
206    {
207        entry.bytes += bytes;
208    } else {
209        allocs.push(DeviceAllocation {
210            devid,
211            flags,
212            bytes,
213        });
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    /// Build a minimal valid chunk item buffer matching the on-disk layout
222    /// that `ChunkItem::parse` expects (sequential LE fields).
223    fn build_chunk_buf(
224        length: u64,
225        stripe_len: u64,
226        type_bits: u64,
227        num_stripes: u16,
228        stripes: &[(u64, u64)], // (devid, offset) per stripe
229    ) -> Vec<u8> {
230        let mut buf = Vec::new();
231        buf.extend_from_slice(&length.to_le_bytes());
232        buf.extend_from_slice(&0u64.to_le_bytes()); // owner
233        buf.extend_from_slice(&stripe_len.to_le_bytes());
234        buf.extend_from_slice(&type_bits.to_le_bytes()); // chunk_type
235        buf.extend_from_slice(&4096u32.to_le_bytes()); // io_align
236        buf.extend_from_slice(&4096u32.to_le_bytes()); // io_width
237        buf.extend_from_slice(&4096u32.to_le_bytes()); // sector_size
238        buf.extend_from_slice(&num_stripes.to_le_bytes());
239        buf.extend_from_slice(&0u16.to_le_bytes()); // sub_stripes
240        for &(devid, offset) in stripes {
241            buf.extend_from_slice(&devid.to_le_bytes());
242            buf.extend_from_slice(&offset.to_le_bytes());
243            buf.extend_from_slice(&[0u8; 16]); // dev_uuid
244        }
245        buf
246    }
247
248    #[test]
249    fn parse_chunk_single_stripe() {
250        let data_flags = BlockGroupFlags::DATA.bits();
251        let buf = build_chunk_buf(1024 * 1024, 65536, data_flags, 1, &[(1, 0)]);
252        let (stripe_len, flags, devids) = parse_chunk(&buf).unwrap();
253        assert_eq!(stripe_len, 65536);
254        assert_eq!(flags, BlockGroupFlags::DATA);
255        assert_eq!(devids, vec![1]);
256    }
257
258    #[test]
259    fn parse_chunk_two_stripes() {
260        let flags_bits =
261            (BlockGroupFlags::DATA | BlockGroupFlags::RAID1).bits();
262        let buf = build_chunk_buf(
263            1 << 30,
264            1 << 30,
265            flags_bits,
266            2,
267            &[(1, 0), (2, 4096)],
268        );
269        let (_, flags, devids) = parse_chunk(&buf).unwrap();
270        assert_eq!(flags, BlockGroupFlags::DATA | BlockGroupFlags::RAID1);
271        assert_eq!(devids, vec![1, 2]);
272    }
273
274    #[test]
275    fn parse_chunk_too_short() {
276        let buf = vec![0u8; 10];
277        assert!(parse_chunk(&buf).is_none());
278    }
279
280    #[test]
281    fn parse_chunk_claims_more_stripes_than_fit() {
282        // num_stripes says 5 but buffer only has room for 1
283        let buf = build_chunk_buf(1024, 1024, 0, 5, &[(1, 0)]);
284        // ChunkItem::parse will parse only as many stripes as fit
285        let result = parse_chunk(&buf);
286        assert!(result.is_some());
287        let (_, _, devids) = result.unwrap();
288        assert_eq!(devids.len(), 1);
289    }
290
291    // --- accumulate ---
292
293    #[test]
294    fn accumulate_new_entry() {
295        let mut allocs = Vec::new();
296        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
297        assert_eq!(allocs.len(), 1);
298        assert_eq!(allocs[0].devid, 1);
299        assert_eq!(allocs[0].bytes, 1000);
300    }
301
302    #[test]
303    fn accumulate_merge_same_devid_flags() {
304        let mut allocs = Vec::new();
305        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
306        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 2000);
307        assert_eq!(allocs.len(), 1);
308        assert_eq!(allocs[0].bytes, 3000);
309    }
310
311    #[test]
312    fn accumulate_separate_different_flags() {
313        let mut allocs = Vec::new();
314        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
315        accumulate(&mut allocs, 1, BlockGroupFlags::METADATA, 2000);
316        assert_eq!(allocs.len(), 2);
317    }
318
319    #[test]
320    fn accumulate_separate_different_devids() {
321        let mut allocs = Vec::new();
322        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
323        accumulate(&mut allocs, 2, BlockGroupFlags::DATA, 2000);
324        assert_eq!(allocs.len(), 2);
325    }
326}