Skip to main content

btrfs_uapi/
chunk.rs

1//! # Per-device physical allocation data from the chunk tree
2//!
3//! Walks the chunk tree to determine how many bytes each device has allocated,
4//! broken down by block-group profile flags.  This is the data source for the
5//! per-device breakdown in `btrfs filesystem usage`.
6//!
7//! Also exposes the full per-stripe chunk list used by `inspect-internal
8//! list-chunks`, including the bytes-used figure from the extent tree.
9//!
10//! Requires `CAP_SYS_ADMIN`.
11
12use crate::{
13    field_size,
14    raw::{
15        BTRFS_BLOCK_GROUP_ITEM_KEY, BTRFS_CHUNK_ITEM_KEY,
16        BTRFS_CHUNK_TREE_OBJECTID, BTRFS_EXTENT_TREE_OBJECTID,
17        BTRFS_FIRST_CHUNK_TREE_OBJECTID, btrfs_block_group_item, btrfs_chunk,
18        btrfs_stripe,
19    },
20    space::BlockGroupFlags,
21    tree_search::{SearchKey, tree_search},
22};
23use std::os::unix::io::BorrowedFd;
24
25/// Physical allocation of one block-group profile on one device, as read
26/// from the chunk tree.
27///
28/// `bytes` is the sum of `stripe_len` over all chunk stripes that land on
29/// `devid` and share the same `flags`.  This is the physical space the device
30/// contributes to that profile, not the logical (usable) space.
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct DeviceAllocation {
33    /// btrfs device ID.
34    pub devid: u64,
35    /// Combined block-group type and profile flags, e.g.
36    /// `BlockGroupFlags::DATA | BlockGroupFlags::RAID1`.
37    pub flags: BlockGroupFlags,
38    /// Physical bytes allocated on this device for chunks with these flags.
39    pub bytes: u64,
40}
41
42const CHUNK_LENGTH_OFF: usize = std::mem::offset_of!(btrfs_chunk, length);
43const CHUNK_STRIPE_LEN_OFF: usize =
44    std::mem::offset_of!(btrfs_chunk, stripe_len);
45const CHUNK_TYPE_OFF: usize = std::mem::offset_of!(btrfs_chunk, type_);
46const CHUNK_NUM_STRIPES_OFF: usize =
47    std::mem::offset_of!(btrfs_chunk, num_stripes);
48const CHUNK_FIRST_STRIPE_OFF: usize = std::mem::offset_of!(btrfs_chunk, stripe);
49
50const STRIPE_SIZE: usize = std::mem::size_of::<btrfs_stripe>();
51const STRIPE_DEVID_OFF: usize = std::mem::offset_of!(btrfs_stripe, devid);
52const STRIPE_OFFSET_OFF: usize = std::mem::offset_of!(btrfs_stripe, offset);
53
54// Minimum item length: the btrfs_chunk struct with exactly one stripe.
55const CHUNK_MIN_LEN: usize = CHUNK_FIRST_STRIPE_OFF + STRIPE_SIZE; // 80
56
57/// One physical chunk stripe as seen in the chunk tree, with usage data from
58/// the extent tree.
59///
60/// For striped profiles (RAID0, RAID10, …) each logical chunk maps to
61/// multiple stripes on different devices; each stripe yields one `ChunkEntry`.
62/// For non-striped profiles (single, DUP) there is one `ChunkEntry` per chunk.
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct ChunkEntry {
65    /// btrfs device ID that holds this stripe.
66    pub devid: u64,
67    /// Physical byte offset of this stripe on the device.
68    pub physical_start: u64,
69    /// Logical byte offset of the chunk within the filesystem address space.
70    pub logical_start: u64,
71    /// Logical length of the chunk in bytes (shared across all stripes of
72    /// the same chunk).
73    pub length: u64,
74    /// Combined block-group type and profile flags.
75    pub flags: BlockGroupFlags,
76    /// Bytes currently used within this chunk, as reported by the extent tree.
77    /// `0` if the block-group item could not be read.
78    pub used: u64,
79}
80
81/// Walk the chunk tree of the filesystem referred to by `fd` and return the
82/// physical allocation of each block-group profile on each device.
83///
84/// The result may contain multiple entries with the same `devid` when a
85/// device participates in chunks of different profiles (e.g. both
86/// `DATA|SINGLE` and `METADATA|DUP`).  Entries with the same `(devid, flags)`
87/// pair are merged — there will be at most one entry per unique pair.
88///
89/// Internally, each `BTRFS_CHUNK_ITEM_KEY` payload is a packed `btrfs_chunk`
90/// struct followed by `num_stripes - 1` additional `btrfs_stripe` structs.
91/// The `stripe_len` field of each stripe is accumulated per `(devid, flags)`
92/// to produce the physical byte counts in the returned list.
93pub fn device_chunk_allocations(
94    fd: BorrowedFd,
95) -> nix::Result<Vec<DeviceAllocation>> {
96    let mut allocs: Vec<DeviceAllocation> = Vec::new();
97
98    tree_search(
99        fd,
100        SearchKey::for_type(
101            BTRFS_CHUNK_TREE_OBJECTID as u64,
102            BTRFS_CHUNK_ITEM_KEY as u32,
103        ),
104        |_hdr, data| {
105            if let Some((stripe_len, flags, stripes)) = parse_chunk(data) {
106                for devid in stripes {
107                    accumulate(&mut allocs, devid, flags, stripe_len);
108                }
109            }
110            Ok(())
111        },
112    )?;
113
114    Ok(allocs)
115}
116
117/// Walk the chunk tree and return one [`ChunkEntry`] per stripe, including
118/// bytes-used from the extent tree.
119///
120/// The returned list is in chunk-tree order (ascending logical offset); call
121/// sites are responsible for any further sorting.  For each logical chunk the
122/// `used` field is populated by a single extent-tree lookup; if that lookup
123/// fails the field is set to `0` rather than propagating an error.
124///
125/// Requires `CAP_SYS_ADMIN`.
126pub fn chunk_list(fd: BorrowedFd) -> nix::Result<Vec<ChunkEntry>> {
127    let mut entries: Vec<ChunkEntry> = Vec::new();
128
129    tree_search(
130        fd,
131        SearchKey::for_objectid_range(
132            BTRFS_CHUNK_TREE_OBJECTID as u64,
133            BTRFS_CHUNK_ITEM_KEY as u32,
134            BTRFS_FIRST_CHUNK_TREE_OBJECTID as u64,
135            BTRFS_FIRST_CHUNK_TREE_OBJECTID as u64,
136        ),
137        |hdr, data| {
138            if let Some(stripes) = parse_chunk_stripes(data) {
139                let logical_start = hdr.offset;
140                let length = read_le_u64(data, CHUNK_LENGTH_OFF);
141                let type_bits = read_le_u64(data, CHUNK_TYPE_OFF);
142                let flags = BlockGroupFlags::from_bits_truncate(type_bits);
143                let used = block_group_used(fd, logical_start).unwrap_or(0);
144                for (devid, physical_start) in stripes {
145                    entries.push(ChunkEntry {
146                        devid,
147                        physical_start,
148                        logical_start,
149                        length,
150                        flags,
151                        used,
152                    });
153                }
154            }
155            Ok(())
156        },
157    )?;
158
159    Ok(entries)
160}
161
162/// Look up the bytes-used counter for the block group at `logical_start` by
163/// searching for `BTRFS_BLOCK_GROUP_ITEM_KEY` in the extent tree.
164///
165/// Returns `None` if the block group item is not found or cannot be read.
166fn block_group_used(fd: BorrowedFd, logical_start: u64) -> Option<u64> {
167    let mut used: Option<u64> = None;
168    tree_search(
169        fd,
170        SearchKey {
171            tree_id: BTRFS_EXTENT_TREE_OBJECTID as u64,
172            min_objectid: logical_start,
173            max_objectid: logical_start,
174            min_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
175            max_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
176            min_offset: 0,
177            max_offset: u64::MAX,
178            min_transid: 0,
179            max_transid: u64::MAX,
180        },
181        |_hdr, data| {
182            let used_off = std::mem::offset_of!(btrfs_block_group_item, used);
183            if data.len()
184                >= used_off + field_size!(btrfs_block_group_item, used)
185            {
186                used = Some(read_le_u64(data, used_off));
187            }
188            Ok(())
189        },
190    )
191    .ok()?;
192    used
193}
194
195/// Parse a raw chunk item payload.
196///
197/// Returns `(stripe_len, flags, devids)` on success, or `None` if the buffer
198/// is too small to be a valid chunk item.
199fn parse_chunk(
200    data: &[u8],
201) -> Option<(u64, BlockGroupFlags, impl Iterator<Item = u64> + '_)> {
202    if data.len() < CHUNK_MIN_LEN {
203        return None;
204    }
205
206    let stripe_len = read_le_u64(data, CHUNK_STRIPE_LEN_OFF);
207    let type_bits = read_le_u64(data, CHUNK_TYPE_OFF);
208    let num_stripes = read_le_u16(data, CHUNK_NUM_STRIPES_OFF) as usize;
209    let _length = read_le_u64(data, CHUNK_LENGTH_OFF);
210
211    // Sanity-check: the item must be large enough to hold all stripes.
212    let expected_len = CHUNK_FIRST_STRIPE_OFF + num_stripes * STRIPE_SIZE;
213    if data.len() < expected_len || num_stripes == 0 {
214        return None;
215    }
216
217    let flags = BlockGroupFlags::from_bits_truncate(type_bits);
218
219    let devids = (0..num_stripes).map(move |i| {
220        let stripe_off = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
221        read_le_u64(data, stripe_off + STRIPE_DEVID_OFF)
222    });
223
224    Some((stripe_len, flags, devids))
225}
226
227/// Parse a raw chunk item payload and return an iterator of `(devid,
228/// physical_start)` pairs for each stripe.
229///
230/// Returns `None` if the buffer is too small to be a valid chunk item.
231fn parse_chunk_stripes(
232    data: &[u8],
233) -> Option<impl Iterator<Item = (u64, u64)> + '_> {
234    if data.len() < CHUNK_MIN_LEN {
235        return None;
236    }
237
238    let num_stripes = read_le_u16(data, CHUNK_NUM_STRIPES_OFF) as usize;
239    let expected_len = CHUNK_FIRST_STRIPE_OFF + num_stripes * STRIPE_SIZE;
240    if data.len() < expected_len || num_stripes == 0 {
241        return None;
242    }
243
244    let iter = (0..num_stripes).map(move |i| {
245        let stripe_off = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
246        let devid = read_le_u64(data, stripe_off + STRIPE_DEVID_OFF);
247        let physical_start = read_le_u64(data, stripe_off + STRIPE_OFFSET_OFF);
248        (devid, physical_start)
249    });
250
251    Some(iter)
252}
253
254/// Add `stripe_len` bytes to the `(devid, flags)` entry, creating it if
255/// it does not yet exist.
256fn accumulate(
257    allocs: &mut Vec<DeviceAllocation>,
258    devid: u64,
259    flags: BlockGroupFlags,
260    bytes: u64,
261) {
262    if let Some(entry) = allocs
263        .iter_mut()
264        .find(|a| a.devid == devid && a.flags == flags)
265    {
266        entry.bytes += bytes;
267    } else {
268        allocs.push(DeviceAllocation {
269            devid,
270            flags,
271            bytes,
272        });
273    }
274}
275
276fn read_le_u64(buf: &[u8], off: usize) -> u64 {
277    u64::from_le_bytes(buf[off..off + 8].try_into().unwrap())
278}
279
280fn read_le_u16(buf: &[u8], off: usize) -> u16 {
281    u16::from_le_bytes(buf[off..off + 2].try_into().unwrap())
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    /// Build a minimal valid single-stripe chunk item buffer.
289    fn build_chunk_buf(
290        length: u64,
291        stripe_len: u64,
292        type_bits: u64,
293        num_stripes: u16,
294        stripes: &[(u64, u64)], // (devid, offset) per stripe
295    ) -> Vec<u8> {
296        let total = CHUNK_FIRST_STRIPE_OFF + stripes.len() * STRIPE_SIZE;
297        let mut buf = vec![0u8; total];
298        buf[CHUNK_LENGTH_OFF..CHUNK_LENGTH_OFF + 8]
299            .copy_from_slice(&length.to_le_bytes());
300        buf[CHUNK_STRIPE_LEN_OFF..CHUNK_STRIPE_LEN_OFF + 8]
301            .copy_from_slice(&stripe_len.to_le_bytes());
302        buf[CHUNK_TYPE_OFF..CHUNK_TYPE_OFF + 8]
303            .copy_from_slice(&type_bits.to_le_bytes());
304        buf[CHUNK_NUM_STRIPES_OFF..CHUNK_NUM_STRIPES_OFF + 2]
305            .copy_from_slice(&num_stripes.to_le_bytes());
306        for (i, &(devid, offset)) in stripes.iter().enumerate() {
307            let s = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
308            buf[s + STRIPE_DEVID_OFF..s + STRIPE_DEVID_OFF + 8]
309                .copy_from_slice(&devid.to_le_bytes());
310            buf[s + STRIPE_OFFSET_OFF..s + STRIPE_OFFSET_OFF + 8]
311                .copy_from_slice(&offset.to_le_bytes());
312        }
313        buf
314    }
315
316    // --- read_le_u64 / read_le_u16 ---
317
318    #[test]
319    fn read_le_u64_basic() {
320        let buf = 0x0102030405060708u64.to_le_bytes();
321        assert_eq!(read_le_u64(&buf, 0), 0x0102030405060708);
322    }
323
324    #[test]
325    fn read_le_u16_basic() {
326        let buf = 0x0102u16.to_le_bytes();
327        assert_eq!(read_le_u16(&buf, 0), 0x0102);
328    }
329
330    // --- parse_chunk ---
331
332    #[test]
333    fn parse_chunk_single_stripe() {
334        let data_flags = BlockGroupFlags::DATA.bits();
335        let buf = build_chunk_buf(1024 * 1024, 65536, data_flags, 1, &[(1, 0)]);
336        let (stripe_len, flags, devids) = parse_chunk(&buf).unwrap();
337        assert_eq!(stripe_len, 65536);
338        assert_eq!(flags, BlockGroupFlags::DATA);
339        let devids: Vec<u64> = devids.collect();
340        assert_eq!(devids, vec![1]);
341    }
342
343    #[test]
344    fn parse_chunk_two_stripes() {
345        let flags_bits =
346            (BlockGroupFlags::DATA | BlockGroupFlags::RAID1).bits();
347        let buf = build_chunk_buf(
348            1 << 30,
349            1 << 30,
350            flags_bits,
351            2,
352            &[(1, 0), (2, 4096)],
353        );
354        let (_, flags, devids) = parse_chunk(&buf).unwrap();
355        assert_eq!(flags, BlockGroupFlags::DATA | BlockGroupFlags::RAID1);
356        let devids: Vec<u64> = devids.collect();
357        assert_eq!(devids, vec![1, 2]);
358    }
359
360    #[test]
361    fn parse_chunk_too_short() {
362        let buf = vec![0u8; CHUNK_MIN_LEN - 1];
363        assert!(parse_chunk(&buf).is_none());
364    }
365
366    #[test]
367    fn parse_chunk_zero_stripes() {
368        // num_stripes = 0 is invalid
369        let buf = build_chunk_buf(1024, 1024, 0, 0, &[]);
370        // buf is only CHUNK_FIRST_STRIPE_OFF bytes, but num_stripes says 0
371        // which means expected_len = CHUNK_FIRST_STRIPE_OFF + 0*STRIPE_SIZE
372        // but the function also checks num_stripes == 0
373        let mut padded = vec![0u8; CHUNK_MIN_LEN];
374        padded[..buf.len().min(CHUNK_MIN_LEN)]
375            .copy_from_slice(&buf[..buf.len().min(CHUNK_MIN_LEN)]);
376        padded[CHUNK_NUM_STRIPES_OFF..CHUNK_NUM_STRIPES_OFF + 2]
377            .copy_from_slice(&0u16.to_le_bytes());
378        assert!(parse_chunk(&padded).is_none());
379    }
380
381    #[test]
382    fn parse_chunk_claims_more_stripes_than_fit() {
383        // num_stripes says 5 but buffer only has room for 1
384        let buf = build_chunk_buf(1024, 1024, 0, 5, &[(1, 0)]);
385        assert!(parse_chunk(&buf).is_none());
386    }
387
388    // --- parse_chunk_stripes ---
389
390    #[test]
391    fn parse_chunk_stripes_returns_devid_and_offset() {
392        let buf =
393            build_chunk_buf(1 << 20, 1 << 20, 0, 2, &[(3, 8192), (7, 16384)]);
394        let stripes: Vec<(u64, u64)> =
395            parse_chunk_stripes(&buf).unwrap().collect();
396        assert_eq!(stripes, vec![(3, 8192), (7, 16384)]);
397    }
398
399    #[test]
400    fn parse_chunk_stripes_too_short() {
401        let buf = vec![0u8; 10];
402        assert!(parse_chunk_stripes(&buf).is_none());
403    }
404
405    // --- accumulate ---
406
407    #[test]
408    fn accumulate_new_entry() {
409        let mut allocs = Vec::new();
410        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
411        assert_eq!(allocs.len(), 1);
412        assert_eq!(allocs[0].devid, 1);
413        assert_eq!(allocs[0].bytes, 1000);
414    }
415
416    #[test]
417    fn accumulate_merge_same_devid_flags() {
418        let mut allocs = Vec::new();
419        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
420        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 2000);
421        assert_eq!(allocs.len(), 1);
422        assert_eq!(allocs[0].bytes, 3000);
423    }
424
425    #[test]
426    fn accumulate_separate_different_flags() {
427        let mut allocs = Vec::new();
428        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
429        accumulate(&mut allocs, 1, BlockGroupFlags::METADATA, 2000);
430        assert_eq!(allocs.len(), 2);
431    }
432
433    #[test]
434    fn accumulate_separate_different_devids() {
435        let mut allocs = Vec::new();
436        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
437        accumulate(&mut allocs, 2, BlockGroupFlags::DATA, 2000);
438        assert_eq!(allocs.len(), 2);
439    }
440}