Skip to main content

btrfs_uapi/
chunk.rs

1//! # Per-device physical allocation data from the chunk tree
2//!
3//! Walks the chunk tree to determine how many bytes each device has allocated,
4//! broken down by block-group profile flags.  This is the data source for the
5//! per-device breakdown in `btrfs filesystem usage`.
6//!
7//! Also exposes the full per-stripe chunk list used by `inspect-internal
8//! list-chunks`, including the bytes-used figure from the extent tree.
9//!
10//! Requires `CAP_SYS_ADMIN`.
11
12use crate::{
13    raw::{
14        BTRFS_BLOCK_GROUP_ITEM_KEY, BTRFS_CHUNK_ITEM_KEY,
15        BTRFS_CHUNK_TREE_OBJECTID, BTRFS_EXTENT_TREE_OBJECTID,
16        BTRFS_FIRST_CHUNK_TREE_OBJECTID,
17    },
18    space::BlockGroupFlags,
19    tree_search::{Key, SearchFilter, tree_search},
20};
21use btrfs_disk::items::ChunkItem;
22use std::os::unix::io::BorrowedFd;
23
24/// Physical allocation of one block-group profile on one device, as read
25/// from the chunk tree.
26///
27/// `bytes` is the sum of `stripe_len` over all chunk stripes that land on
28/// `devid` and share the same `flags`.  This is the physical space the device
29/// contributes to that profile, not the logical (usable) space.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct DeviceAllocation {
32    /// btrfs device ID.
33    pub devid: u64,
34    /// Combined block-group type and profile flags, e.g.
35    /// `BlockGroupFlags::DATA | BlockGroupFlags::RAID1`.
36    pub flags: BlockGroupFlags,
37    /// Physical bytes allocated on this device for chunks with these flags.
38    pub bytes: u64,
39}
40
41/// One physical chunk stripe as seen in the chunk tree, with usage data from
42/// the extent tree.
43///
44/// For striped profiles (RAID0, RAID10, …) each logical chunk maps to
45/// multiple stripes on different devices; each stripe yields one `ChunkEntry`.
46/// For non-striped profiles (single, DUP) there is one `ChunkEntry` per chunk.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct ChunkEntry {
49    /// btrfs device ID that holds this stripe.
50    pub devid: u64,
51    /// Physical byte offset of this stripe on the device.
52    pub physical_start: u64,
53    /// Logical byte offset of the chunk within the filesystem address space.
54    pub logical_start: u64,
55    /// Logical length of the chunk in bytes (shared across all stripes of
56    /// the same chunk).
57    pub length: u64,
58    /// Combined block-group type and profile flags.
59    pub flags: BlockGroupFlags,
60    /// Bytes currently used within this chunk, as reported by the extent tree.
61    /// `0` if the block-group item could not be read.
62    pub used: u64,
63}
64
65/// Walk the chunk tree of the filesystem referred to by `fd` and return the
66/// physical allocation of each block-group profile on each device.
67///
68/// The result may contain multiple entries with the same `devid` when a
69/// device participates in chunks of different profiles (e.g. both
70/// `DATA|SINGLE` and `METADATA|DUP`).  Entries with the same `(devid, flags)`
71/// pair are merged — there will be at most one entry per unique pair.
72///
73/// Internally, each `BTRFS_CHUNK_ITEM_KEY` payload is a packed `btrfs_chunk`
74/// struct followed by `num_stripes - 1` additional `btrfs_stripe` structs.
75/// The `stripe_len` field of each stripe is accumulated per `(devid, flags)`
76/// to produce the physical byte counts in the returned list.
77///
78/// # Errors
79///
80/// Returns `Err` if the tree search ioctl fails.
81pub fn device_chunk_allocations(
82    fd: BorrowedFd,
83) -> nix::Result<Vec<DeviceAllocation>> {
84    let mut allocs: Vec<DeviceAllocation> = Vec::new();
85
86    tree_search(
87        fd,
88        SearchFilter::for_type(
89            u64::from(BTRFS_CHUNK_TREE_OBJECTID),
90            BTRFS_CHUNK_ITEM_KEY,
91        ),
92        |_hdr, data| {
93            if let Some((stripe_len, flags, stripes)) = parse_chunk(data) {
94                for devid in stripes {
95                    accumulate(&mut allocs, devid, flags, stripe_len);
96                }
97            }
98            Ok(())
99        },
100    )?;
101
102    Ok(allocs)
103}
104
105/// Walk the chunk tree and return one [`ChunkEntry`] per stripe, including
106/// bytes-used from the extent tree.
107///
108/// The returned list is in chunk-tree order (ascending logical offset); call
109/// sites are responsible for any further sorting.  For each logical chunk the
110/// `used` field is populated by a single extent-tree lookup; if that lookup
111/// fails the field is set to `0` rather than propagating an error.
112///
113/// Requires `CAP_SYS_ADMIN`.
114///
115/// # Errors
116///
117/// Returns `Err` if the tree search ioctl fails.
118pub fn chunk_list(fd: BorrowedFd) -> nix::Result<Vec<ChunkEntry>> {
119    let mut entries: Vec<ChunkEntry> = Vec::new();
120
121    tree_search(
122        fd,
123        SearchFilter::for_objectid_range(
124            u64::from(BTRFS_CHUNK_TREE_OBJECTID),
125            BTRFS_CHUNK_ITEM_KEY,
126            u64::from(BTRFS_FIRST_CHUNK_TREE_OBJECTID),
127            u64::from(BTRFS_FIRST_CHUNK_TREE_OBJECTID),
128        ),
129        |hdr, data| {
130            if let Some(chunk) = ChunkItem::parse(data) {
131                let logical_start = hdr.offset;
132                let flags = BlockGroupFlags::from_bits_truncate(
133                    chunk.chunk_type.bits(),
134                );
135                let used = block_group_used(fd, logical_start).unwrap_or(0);
136                for stripe in &chunk.stripes {
137                    entries.push(ChunkEntry {
138                        devid: stripe.devid,
139                        physical_start: stripe.offset,
140                        logical_start,
141                        length: chunk.length,
142                        flags,
143                        used,
144                    });
145                }
146            }
147            Ok(())
148        },
149    )?;
150
151    Ok(entries)
152}
153
154/// Look up the bytes-used counter for the block group at `logical_start` by
155/// searching for `BTRFS_BLOCK_GROUP_ITEM_KEY` in the extent tree.
156///
157/// Returns `None` if the block group item is not found or cannot be read.
158fn block_group_used(fd: BorrowedFd, logical_start: u64) -> Option<u64> {
159    let mut used: Option<u64> = None;
160    tree_search(
161        fd,
162        SearchFilter {
163            tree_id: u64::from(BTRFS_EXTENT_TREE_OBJECTID),
164            start: Key {
165                objectid: logical_start,
166                item_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
167                offset: 0,
168            },
169            end: Key {
170                objectid: logical_start,
171                item_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
172                offset: u64::MAX,
173            },
174            min_transid: 0,
175            max_transid: u64::MAX,
176        },
177        |_hdr, data| {
178            if let Some(bg) = btrfs_disk::items::BlockGroupItem::parse(data) {
179                used = Some(bg.used);
180            }
181            Ok(())
182        },
183    )
184    .ok()?;
185    used
186}
187
188/// Parse a raw chunk item payload.
189///
190/// Returns `(stripe_len, flags, devids)` on success, or `None` if the buffer
191/// is too small to be a valid chunk item.
192fn parse_chunk(data: &[u8]) -> Option<(u64, BlockGroupFlags, Vec<u64>)> {
193    let chunk = ChunkItem::parse(data)?;
194    let flags = BlockGroupFlags::from_bits_truncate(chunk.chunk_type.bits());
195    let devids: Vec<u64> = chunk.stripes.iter().map(|s| s.devid).collect();
196    Some((chunk.stripe_len, flags, devids))
197}
198
199/// Add `stripe_len` bytes to the `(devid, flags)` entry, creating it if
200/// it does not yet exist.
201fn accumulate(
202    allocs: &mut Vec<DeviceAllocation>,
203    devid: u64,
204    flags: BlockGroupFlags,
205    bytes: u64,
206) {
207    if let Some(entry) = allocs
208        .iter_mut()
209        .find(|a| a.devid == devid && a.flags == flags)
210    {
211        entry.bytes += bytes;
212    } else {
213        allocs.push(DeviceAllocation {
214            devid,
215            flags,
216            bytes,
217        });
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    /// Build a minimal valid chunk item buffer matching the on-disk layout
226    /// that `ChunkItem::parse` expects (sequential LE fields).
227    fn build_chunk_buf(
228        length: u64,
229        stripe_len: u64,
230        type_bits: u64,
231        num_stripes: u16,
232        stripes: &[(u64, u64)], // (devid, offset) per stripe
233    ) -> Vec<u8> {
234        let mut buf = Vec::new();
235        buf.extend_from_slice(&length.to_le_bytes());
236        buf.extend_from_slice(&0u64.to_le_bytes()); // owner
237        buf.extend_from_slice(&stripe_len.to_le_bytes());
238        buf.extend_from_slice(&type_bits.to_le_bytes()); // chunk_type
239        buf.extend_from_slice(&4096u32.to_le_bytes()); // io_align
240        buf.extend_from_slice(&4096u32.to_le_bytes()); // io_width
241        buf.extend_from_slice(&4096u32.to_le_bytes()); // sector_size
242        buf.extend_from_slice(&num_stripes.to_le_bytes());
243        buf.extend_from_slice(&0u16.to_le_bytes()); // sub_stripes
244        for &(devid, offset) in stripes {
245            buf.extend_from_slice(&devid.to_le_bytes());
246            buf.extend_from_slice(&offset.to_le_bytes());
247            buf.extend_from_slice(&[0u8; 16]); // dev_uuid
248        }
249        buf
250    }
251
252    #[test]
253    fn parse_chunk_single_stripe() {
254        let data_flags = BlockGroupFlags::DATA.bits();
255        let buf = build_chunk_buf(1024 * 1024, 65536, data_flags, 1, &[(1, 0)]);
256        let (stripe_len, flags, devids) = parse_chunk(&buf).unwrap();
257        assert_eq!(stripe_len, 65536);
258        assert_eq!(flags, BlockGroupFlags::DATA);
259        assert_eq!(devids, vec![1]);
260    }
261
262    #[test]
263    fn parse_chunk_two_stripes() {
264        let flags_bits =
265            (BlockGroupFlags::DATA | BlockGroupFlags::RAID1).bits();
266        let buf = build_chunk_buf(
267            1 << 30,
268            1 << 30,
269            flags_bits,
270            2,
271            &[(1, 0), (2, 4096)],
272        );
273        let (_, flags, devids) = parse_chunk(&buf).unwrap();
274        assert_eq!(flags, BlockGroupFlags::DATA | BlockGroupFlags::RAID1);
275        assert_eq!(devids, vec![1, 2]);
276    }
277
278    #[test]
279    fn parse_chunk_too_short() {
280        let buf = vec![0u8; 10];
281        assert!(parse_chunk(&buf).is_none());
282    }
283
284    #[test]
285    fn parse_chunk_claims_more_stripes_than_fit() {
286        // num_stripes says 5 but buffer only has room for 1
287        let buf = build_chunk_buf(1024, 1024, 0, 5, &[(1, 0)]);
288        // ChunkItem::parse will parse only as many stripes as fit
289        let result = parse_chunk(&buf);
290        assert!(result.is_some());
291        let (_, _, devids) = result.unwrap();
292        assert_eq!(devids.len(), 1);
293    }
294
295    // --- accumulate ---
296
297    #[test]
298    fn accumulate_new_entry() {
299        let mut allocs = Vec::new();
300        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
301        assert_eq!(allocs.len(), 1);
302        assert_eq!(allocs[0].devid, 1);
303        assert_eq!(allocs[0].bytes, 1000);
304    }
305
306    #[test]
307    fn accumulate_merge_same_devid_flags() {
308        let mut allocs = Vec::new();
309        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
310        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 2000);
311        assert_eq!(allocs.len(), 1);
312        assert_eq!(allocs[0].bytes, 3000);
313    }
314
315    #[test]
316    fn accumulate_separate_different_flags() {
317        let mut allocs = Vec::new();
318        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
319        accumulate(&mut allocs, 1, BlockGroupFlags::METADATA, 2000);
320        assert_eq!(allocs.len(), 2);
321    }
322
323    #[test]
324    fn accumulate_separate_different_devids() {
325        let mut allocs = Vec::new();
326        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
327        accumulate(&mut allocs, 2, BlockGroupFlags::DATA, 2000);
328        assert_eq!(allocs.len(), 2);
329    }
330}