Skip to main content

btrfs_uapi/
chunk.rs

1//! # Per-device physical allocation data from the chunk tree
2//!
3//! Walks the chunk tree to determine how many bytes each device has allocated,
4//! broken down by block-group profile flags.  This is the data source for the
5//! per-device breakdown in `btrfs filesystem usage`.
6//!
7//! Also exposes the full per-stripe chunk list used by `inspect-internal
8//! list-chunks`, including the bytes-used figure from the extent tree.
9//!
10//! Requires `CAP_SYS_ADMIN`.
11
12use crate::{
13    field_size,
14    raw::{
15        BTRFS_BLOCK_GROUP_ITEM_KEY, BTRFS_CHUNK_ITEM_KEY,
16        BTRFS_CHUNK_TREE_OBJECTID, BTRFS_EXTENT_TREE_OBJECTID,
17        BTRFS_FIRST_CHUNK_TREE_OBJECTID, btrfs_block_group_item, btrfs_chunk,
18        btrfs_stripe,
19    },
20    space::BlockGroupFlags,
21    tree_search::{SearchKey, tree_search},
22    util::{read_le_u16, read_le_u64},
23};
24use std::os::unix::io::BorrowedFd;
25
26/// Physical allocation of one block-group profile on one device, as read
27/// from the chunk tree.
28///
29/// `bytes` is the sum of `stripe_len` over all chunk stripes that land on
30/// `devid` and share the same `flags`.  This is the physical space the device
31/// contributes to that profile, not the logical (usable) space.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct DeviceAllocation {
34    /// btrfs device ID.
35    pub devid: u64,
36    /// Combined block-group type and profile flags, e.g.
37    /// `BlockGroupFlags::DATA | BlockGroupFlags::RAID1`.
38    pub flags: BlockGroupFlags,
39    /// Physical bytes allocated on this device for chunks with these flags.
40    pub bytes: u64,
41}
42
43const CHUNK_LENGTH_OFF: usize = std::mem::offset_of!(btrfs_chunk, length);
44const CHUNK_STRIPE_LEN_OFF: usize =
45    std::mem::offset_of!(btrfs_chunk, stripe_len);
46const CHUNK_TYPE_OFF: usize = std::mem::offset_of!(btrfs_chunk, type_);
47const CHUNK_NUM_STRIPES_OFF: usize =
48    std::mem::offset_of!(btrfs_chunk, num_stripes);
49const CHUNK_FIRST_STRIPE_OFF: usize = std::mem::offset_of!(btrfs_chunk, stripe);
50
51const STRIPE_SIZE: usize = std::mem::size_of::<btrfs_stripe>();
52const STRIPE_DEVID_OFF: usize = std::mem::offset_of!(btrfs_stripe, devid);
53const STRIPE_OFFSET_OFF: usize = std::mem::offset_of!(btrfs_stripe, offset);
54
55// Minimum item length: the btrfs_chunk struct with exactly one stripe.
56const CHUNK_MIN_LEN: usize = CHUNK_FIRST_STRIPE_OFF + STRIPE_SIZE; // 80
57
58/// One physical chunk stripe as seen in the chunk tree, with usage data from
59/// the extent tree.
60///
61/// For striped profiles (RAID0, RAID10, …) each logical chunk maps to
62/// multiple stripes on different devices; each stripe yields one `ChunkEntry`.
63/// For non-striped profiles (single, DUP) there is one `ChunkEntry` per chunk.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct ChunkEntry {
66    /// btrfs device ID that holds this stripe.
67    pub devid: u64,
68    /// Physical byte offset of this stripe on the device.
69    pub physical_start: u64,
70    /// Logical byte offset of the chunk within the filesystem address space.
71    pub logical_start: u64,
72    /// Logical length of the chunk in bytes (shared across all stripes of
73    /// the same chunk).
74    pub length: u64,
75    /// Combined block-group type and profile flags.
76    pub flags: BlockGroupFlags,
77    /// Bytes currently used within this chunk, as reported by the extent tree.
78    /// `0` if the block-group item could not be read.
79    pub used: u64,
80}
81
82/// Walk the chunk tree of the filesystem referred to by `fd` and return the
83/// physical allocation of each block-group profile on each device.
84///
85/// The result may contain multiple entries with the same `devid` when a
86/// device participates in chunks of different profiles (e.g. both
87/// `DATA|SINGLE` and `METADATA|DUP`).  Entries with the same `(devid, flags)`
88/// pair are merged — there will be at most one entry per unique pair.
89///
90/// Internally, each `BTRFS_CHUNK_ITEM_KEY` payload is a packed `btrfs_chunk`
91/// struct followed by `num_stripes - 1` additional `btrfs_stripe` structs.
92/// The `stripe_len` field of each stripe is accumulated per `(devid, flags)`
93/// to produce the physical byte counts in the returned list.
94pub fn device_chunk_allocations(
95    fd: BorrowedFd,
96) -> nix::Result<Vec<DeviceAllocation>> {
97    let mut allocs: Vec<DeviceAllocation> = Vec::new();
98
99    tree_search(
100        fd,
101        SearchKey::for_type(
102            BTRFS_CHUNK_TREE_OBJECTID as u64,
103            BTRFS_CHUNK_ITEM_KEY,
104        ),
105        |_hdr, data| {
106            if let Some((stripe_len, flags, stripes)) = parse_chunk(data) {
107                for devid in stripes {
108                    accumulate(&mut allocs, devid, flags, stripe_len);
109                }
110            }
111            Ok(())
112        },
113    )?;
114
115    Ok(allocs)
116}
117
118/// Walk the chunk tree and return one [`ChunkEntry`] per stripe, including
119/// bytes-used from the extent tree.
120///
121/// The returned list is in chunk-tree order (ascending logical offset); call
122/// sites are responsible for any further sorting.  For each logical chunk the
123/// `used` field is populated by a single extent-tree lookup; if that lookup
124/// fails the field is set to `0` rather than propagating an error.
125///
126/// Requires `CAP_SYS_ADMIN`.
127pub fn chunk_list(fd: BorrowedFd) -> nix::Result<Vec<ChunkEntry>> {
128    let mut entries: Vec<ChunkEntry> = Vec::new();
129
130    tree_search(
131        fd,
132        SearchKey::for_objectid_range(
133            BTRFS_CHUNK_TREE_OBJECTID as u64,
134            BTRFS_CHUNK_ITEM_KEY,
135            BTRFS_FIRST_CHUNK_TREE_OBJECTID as u64,
136            BTRFS_FIRST_CHUNK_TREE_OBJECTID as u64,
137        ),
138        |hdr, data| {
139            if let Some(stripes) = parse_chunk_stripes(data) {
140                let logical_start = hdr.offset;
141                let length = read_le_u64(data, CHUNK_LENGTH_OFF);
142                let type_bits = read_le_u64(data, CHUNK_TYPE_OFF);
143                let flags = BlockGroupFlags::from_bits_truncate(type_bits);
144                let used = block_group_used(fd, logical_start).unwrap_or(0);
145                for (devid, physical_start) in stripes {
146                    entries.push(ChunkEntry {
147                        devid,
148                        physical_start,
149                        logical_start,
150                        length,
151                        flags,
152                        used,
153                    });
154                }
155            }
156            Ok(())
157        },
158    )?;
159
160    Ok(entries)
161}
162
163/// Look up the bytes-used counter for the block group at `logical_start` by
164/// searching for `BTRFS_BLOCK_GROUP_ITEM_KEY` in the extent tree.
165///
166/// Returns `None` if the block group item is not found or cannot be read.
167fn block_group_used(fd: BorrowedFd, logical_start: u64) -> Option<u64> {
168    let mut used: Option<u64> = None;
169    tree_search(
170        fd,
171        SearchKey {
172            tree_id: BTRFS_EXTENT_TREE_OBJECTID as u64,
173            min_objectid: logical_start,
174            max_objectid: logical_start,
175            min_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
176            max_type: BTRFS_BLOCK_GROUP_ITEM_KEY,
177            min_offset: 0,
178            max_offset: u64::MAX,
179            min_transid: 0,
180            max_transid: u64::MAX,
181        },
182        |_hdr, data| {
183            let used_off = std::mem::offset_of!(btrfs_block_group_item, used);
184            if data.len()
185                >= used_off + field_size!(btrfs_block_group_item, used)
186            {
187                used = Some(read_le_u64(data, used_off));
188            }
189            Ok(())
190        },
191    )
192    .ok()?;
193    used
194}
195
196/// Parse a raw chunk item payload.
197///
198/// Returns `(stripe_len, flags, devids)` on success, or `None` if the buffer
199/// is too small to be a valid chunk item.
200fn parse_chunk(
201    data: &[u8],
202) -> Option<(u64, BlockGroupFlags, impl Iterator<Item = u64> + '_)> {
203    if data.len() < CHUNK_MIN_LEN {
204        return None;
205    }
206
207    let stripe_len = read_le_u64(data, CHUNK_STRIPE_LEN_OFF);
208    let type_bits = read_le_u64(data, CHUNK_TYPE_OFF);
209    let num_stripes = read_le_u16(data, CHUNK_NUM_STRIPES_OFF) as usize;
210    let _length = read_le_u64(data, CHUNK_LENGTH_OFF);
211
212    // Sanity-check: the item must be large enough to hold all stripes.
213    let expected_len = CHUNK_FIRST_STRIPE_OFF + num_stripes * STRIPE_SIZE;
214    if data.len() < expected_len || num_stripes == 0 {
215        return None;
216    }
217
218    let flags = BlockGroupFlags::from_bits_truncate(type_bits);
219
220    let devids = (0..num_stripes).map(move |i| {
221        let stripe_off = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
222        read_le_u64(data, stripe_off + STRIPE_DEVID_OFF)
223    });
224
225    Some((stripe_len, flags, devids))
226}
227
228/// Parse a raw chunk item payload and return an iterator of `(devid,
229/// physical_start)` pairs for each stripe.
230///
231/// Returns `None` if the buffer is too small to be a valid chunk item.
232fn parse_chunk_stripes(
233    data: &[u8],
234) -> Option<impl Iterator<Item = (u64, u64)> + '_> {
235    if data.len() < CHUNK_MIN_LEN {
236        return None;
237    }
238
239    let num_stripes = read_le_u16(data, CHUNK_NUM_STRIPES_OFF) as usize;
240    let expected_len = CHUNK_FIRST_STRIPE_OFF + num_stripes * STRIPE_SIZE;
241    if data.len() < expected_len || num_stripes == 0 {
242        return None;
243    }
244
245    let iter = (0..num_stripes).map(move |i| {
246        let stripe_off = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
247        let devid = read_le_u64(data, stripe_off + STRIPE_DEVID_OFF);
248        let physical_start = read_le_u64(data, stripe_off + STRIPE_OFFSET_OFF);
249        (devid, physical_start)
250    });
251
252    Some(iter)
253}
254
255/// Add `stripe_len` bytes to the `(devid, flags)` entry, creating it if
256/// it does not yet exist.
257fn accumulate(
258    allocs: &mut Vec<DeviceAllocation>,
259    devid: u64,
260    flags: BlockGroupFlags,
261    bytes: u64,
262) {
263    if let Some(entry) = allocs
264        .iter_mut()
265        .find(|a| a.devid == devid && a.flags == flags)
266    {
267        entry.bytes += bytes;
268    } else {
269        allocs.push(DeviceAllocation {
270            devid,
271            flags,
272            bytes,
273        });
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    /// Build a minimal valid single-stripe chunk item buffer.
282    fn build_chunk_buf(
283        length: u64,
284        stripe_len: u64,
285        type_bits: u64,
286        num_stripes: u16,
287        stripes: &[(u64, u64)], // (devid, offset) per stripe
288    ) -> Vec<u8> {
289        let total = CHUNK_FIRST_STRIPE_OFF + stripes.len() * STRIPE_SIZE;
290        let mut buf = vec![0u8; total];
291        buf[CHUNK_LENGTH_OFF..CHUNK_LENGTH_OFF + 8]
292            .copy_from_slice(&length.to_le_bytes());
293        buf[CHUNK_STRIPE_LEN_OFF..CHUNK_STRIPE_LEN_OFF + 8]
294            .copy_from_slice(&stripe_len.to_le_bytes());
295        buf[CHUNK_TYPE_OFF..CHUNK_TYPE_OFF + 8]
296            .copy_from_slice(&type_bits.to_le_bytes());
297        buf[CHUNK_NUM_STRIPES_OFF..CHUNK_NUM_STRIPES_OFF + 2]
298            .copy_from_slice(&num_stripes.to_le_bytes());
299        for (i, &(devid, offset)) in stripes.iter().enumerate() {
300            let s = CHUNK_FIRST_STRIPE_OFF + i * STRIPE_SIZE;
301            buf[s + STRIPE_DEVID_OFF..s + STRIPE_DEVID_OFF + 8]
302                .copy_from_slice(&devid.to_le_bytes());
303            buf[s + STRIPE_OFFSET_OFF..s + STRIPE_OFFSET_OFF + 8]
304                .copy_from_slice(&offset.to_le_bytes());
305        }
306        buf
307    }
308
309    // --- parse_chunk ---
310
311    #[test]
312    fn parse_chunk_single_stripe() {
313        let data_flags = BlockGroupFlags::DATA.bits();
314        let buf = build_chunk_buf(1024 * 1024, 65536, data_flags, 1, &[(1, 0)]);
315        let (stripe_len, flags, devids) = parse_chunk(&buf).unwrap();
316        assert_eq!(stripe_len, 65536);
317        assert_eq!(flags, BlockGroupFlags::DATA);
318        let devids: Vec<u64> = devids.collect();
319        assert_eq!(devids, vec![1]);
320    }
321
322    #[test]
323    fn parse_chunk_two_stripes() {
324        let flags_bits =
325            (BlockGroupFlags::DATA | BlockGroupFlags::RAID1).bits();
326        let buf = build_chunk_buf(
327            1 << 30,
328            1 << 30,
329            flags_bits,
330            2,
331            &[(1, 0), (2, 4096)],
332        );
333        let (_, flags, devids) = parse_chunk(&buf).unwrap();
334        assert_eq!(flags, BlockGroupFlags::DATA | BlockGroupFlags::RAID1);
335        let devids: Vec<u64> = devids.collect();
336        assert_eq!(devids, vec![1, 2]);
337    }
338
339    #[test]
340    fn parse_chunk_too_short() {
341        let buf = vec![0u8; CHUNK_MIN_LEN - 1];
342        assert!(parse_chunk(&buf).is_none());
343    }
344
345    #[test]
346    fn parse_chunk_zero_stripes() {
347        // num_stripes = 0 is invalid
348        let buf = build_chunk_buf(1024, 1024, 0, 0, &[]);
349        // buf is only CHUNK_FIRST_STRIPE_OFF bytes, but num_stripes says 0
350        // which means expected_len = CHUNK_FIRST_STRIPE_OFF + 0*STRIPE_SIZE
351        // but the function also checks num_stripes == 0
352        let mut padded = vec![0u8; CHUNK_MIN_LEN];
353        padded[..buf.len().min(CHUNK_MIN_LEN)]
354            .copy_from_slice(&buf[..buf.len().min(CHUNK_MIN_LEN)]);
355        padded[CHUNK_NUM_STRIPES_OFF..CHUNK_NUM_STRIPES_OFF + 2]
356            .copy_from_slice(&0u16.to_le_bytes());
357        assert!(parse_chunk(&padded).is_none());
358    }
359
360    #[test]
361    fn parse_chunk_claims_more_stripes_than_fit() {
362        // num_stripes says 5 but buffer only has room for 1
363        let buf = build_chunk_buf(1024, 1024, 0, 5, &[(1, 0)]);
364        assert!(parse_chunk(&buf).is_none());
365    }
366
367    // --- parse_chunk_stripes ---
368
369    #[test]
370    fn parse_chunk_stripes_returns_devid_and_offset() {
371        let buf =
372            build_chunk_buf(1 << 20, 1 << 20, 0, 2, &[(3, 8192), (7, 16384)]);
373        let stripes: Vec<(u64, u64)> =
374            parse_chunk_stripes(&buf).unwrap().collect();
375        assert_eq!(stripes, vec![(3, 8192), (7, 16384)]);
376    }
377
378    #[test]
379    fn parse_chunk_stripes_too_short() {
380        let buf = vec![0u8; 10];
381        assert!(parse_chunk_stripes(&buf).is_none());
382    }
383
384    // --- accumulate ---
385
386    #[test]
387    fn accumulate_new_entry() {
388        let mut allocs = Vec::new();
389        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
390        assert_eq!(allocs.len(), 1);
391        assert_eq!(allocs[0].devid, 1);
392        assert_eq!(allocs[0].bytes, 1000);
393    }
394
395    #[test]
396    fn accumulate_merge_same_devid_flags() {
397        let mut allocs = Vec::new();
398        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
399        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 2000);
400        assert_eq!(allocs.len(), 1);
401        assert_eq!(allocs[0].bytes, 3000);
402    }
403
404    #[test]
405    fn accumulate_separate_different_flags() {
406        let mut allocs = Vec::new();
407        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
408        accumulate(&mut allocs, 1, BlockGroupFlags::METADATA, 2000);
409        assert_eq!(allocs.len(), 2);
410    }
411
412    #[test]
413    fn accumulate_separate_different_devids() {
414        let mut allocs = Vec::new();
415        accumulate(&mut allocs, 1, BlockGroupFlags::DATA, 1000);
416        accumulate(&mut allocs, 2, BlockGroupFlags::DATA, 2000);
417        assert_eq!(allocs.len(), 2);
418    }
419}