Skip to main content

btrfs_uapi/
qgroup.rs

1//! # Quota group (qgroup) management: hierarchical accounting of disk usage
2//!
3//! Qgroups track how much disk space a subvolume (or a group of subvolumes)
4//! uses.  Every subvolume automatically gets a level-0 qgroup whose ID matches
5//! the subvolume ID.  Higher-level qgroups can be created and linked into a
6//! parent–child hierarchy so that space usage rolls up through the tree.
7//!
8//! Quota must be enabled on the filesystem (see [`crate::quota`]) before any
9//! qgroup operations will succeed.  Most operations require `CAP_SYS_ADMIN`.
10
11use crate::{
12    field_size,
13    raw::{
14        BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID, BTRFS_QGROUP_INFO_KEY,
15        BTRFS_QGROUP_LIMIT_EXCL_CMPR, BTRFS_QGROUP_LIMIT_KEY, BTRFS_QGROUP_LIMIT_MAX_EXCL,
16        BTRFS_QGROUP_LIMIT_MAX_RFER, BTRFS_QGROUP_LIMIT_RFER_CMPR, BTRFS_QGROUP_RELATION_KEY,
17        BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT, BTRFS_QGROUP_STATUS_FLAG_ON,
18        BTRFS_QGROUP_STATUS_FLAG_RESCAN, BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE,
19        BTRFS_QGROUP_STATUS_KEY, BTRFS_QUOTA_TREE_OBJECTID, BTRFS_ROOT_ITEM_KEY,
20        BTRFS_ROOT_TREE_OBJECTID, btrfs_ioc_qgroup_assign, btrfs_ioc_qgroup_create,
21        btrfs_ioc_qgroup_limit, btrfs_ioctl_qgroup_assign_args, btrfs_ioctl_qgroup_create_args,
22        btrfs_ioctl_qgroup_limit_args, btrfs_qgroup_info_item, btrfs_qgroup_limit,
23        btrfs_qgroup_limit_item, btrfs_qgroup_status_item,
24    },
25    tree_search::{SearchKey, tree_search},
26};
27use bitflags::bitflags;
28use nix::errno::Errno;
29use std::{
30    collections::{HashMap, HashSet},
31    mem::{self, offset_of, size_of},
32    os::{fd::AsRawFd, unix::io::BorrowedFd},
33};
34
35// ---------------------------------------------------------------------------
36// Qgroup ID encoding
37// ---------------------------------------------------------------------------
38
39/// Extract the hierarchy level from a packed qgroup ID.
40///
41/// `qgroupid = (level << 48) | subvolid`.  Level 0 qgroups correspond
42/// directly to subvolumes.
43#[inline]
44pub fn qgroupid_level(qgroupid: u64) -> u16 {
45    (qgroupid >> 48) as u16
46}
47
48/// Extract the subvolume ID component from a packed qgroup ID.
49///
50/// Only meaningful for level-0 qgroups.
51#[inline]
52pub fn qgroupid_subvolid(qgroupid: u64) -> u64 {
53    qgroupid & 0x0000_FFFF_FFFF_FFFF
54}
55
56// ---------------------------------------------------------------------------
57// Public flag types
58// ---------------------------------------------------------------------------
59
60bitflags! {
61    /// Status flags for the quota tree as a whole (`BTRFS_QGROUP_STATUS_KEY`).
62    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
63    pub struct QgroupStatusFlags: u64 {
64        /// Quota accounting is enabled.
65        const ON           = BTRFS_QGROUP_STATUS_FLAG_ON as u64;
66        /// A rescan is currently in progress.
67        const RESCAN       = BTRFS_QGROUP_STATUS_FLAG_RESCAN as u64;
68        /// Accounting is inconsistent and a rescan is needed.
69        const INCONSISTENT = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT as u64;
70        /// Simple quota mode (squota) is active.
71        const SIMPLE_MODE  = BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE as u64;
72    }
73}
74
75bitflags! {
76    /// Which limit fields are actively enforced on a qgroup.
77    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
78    pub struct QgroupLimitFlags: u64 {
79        /// `max_rfer` (maximum referenced bytes) is enforced.
80        const MAX_RFER  = BTRFS_QGROUP_LIMIT_MAX_RFER as u64;
81        /// `max_excl` (maximum exclusive bytes) is enforced.
82        const MAX_EXCL  = BTRFS_QGROUP_LIMIT_MAX_EXCL as u64;
83        /// Referenced bytes are compressed before comparison.
84        const RFER_CMPR = BTRFS_QGROUP_LIMIT_RFER_CMPR as u64;
85        /// Exclusive bytes are compressed before comparison.
86        const EXCL_CMPR = BTRFS_QGROUP_LIMIT_EXCL_CMPR as u64;
87    }
88}
89
90// ---------------------------------------------------------------------------
91// Public data types
92// ---------------------------------------------------------------------------
93
94/// Usage and limit information for a single qgroup.
95#[derive(Debug, Clone)]
96pub struct QgroupInfo {
97    /// Packed qgroup ID: `(level << 48) | subvolid`.
98    pub qgroupid: u64,
99    /// Total referenced bytes (includes shared data).
100    pub rfer: u64,
101    /// Referenced bytes after compression.
102    pub rfer_cmpr: u64,
103    /// Exclusively-owned bytes (not shared with any other subvolume).
104    pub excl: u64,
105    /// Exclusively-owned bytes after compression.
106    pub excl_cmpr: u64,
107    /// Limit flags — which of the limit fields below are enforced.
108    pub limit_flags: QgroupLimitFlags,
109    /// Maximum referenced bytes.  `u64::MAX` when no limit is set.
110    pub max_rfer: u64,
111    /// Maximum exclusive bytes.  `u64::MAX` when no limit is set.
112    pub max_excl: u64,
113    /// IDs of qgroups that are parents of this one in the hierarchy.
114    pub parents: Vec<u64>,
115    /// IDs of qgroups that are children of this one in the hierarchy.
116    pub children: Vec<u64>,
117    /// Level-0 only: `true` when the corresponding subvolume no longer
118    /// exists (this is a "stale" qgroup left behind after deletion).
119    pub stale: bool,
120}
121
122/// Result of [`qgroup_list`]: overall quota status and per-qgroup details.
123#[derive(Debug, Clone)]
124pub struct QgroupList {
125    /// Flags from the `BTRFS_QGROUP_STATUS_KEY` item.
126    pub status_flags: QgroupStatusFlags,
127    /// All qgroups found in the quota tree, sorted by `qgroupid`.
128    pub qgroups: Vec<QgroupInfo>,
129}
130
131// ---------------------------------------------------------------------------
132// Internal builder used while scanning the quota tree
133// ---------------------------------------------------------------------------
134
135#[derive(Default)]
136struct QgroupEntryBuilder {
137    // From INFO item
138    has_info: bool,
139    rfer: u64,
140    rfer_cmpr: u64,
141    excl: u64,
142    excl_cmpr: u64,
143    // From LIMIT item
144    has_limit: bool,
145    limit_flags: u64,
146    max_rfer: u64,
147    max_excl: u64,
148    // From RELATION items
149    parents: Vec<u64>,
150    children: Vec<u64>,
151}
152
153impl QgroupEntryBuilder {
154    fn build(self, qgroupid: u64, stale: bool) -> QgroupInfo {
155        QgroupInfo {
156            qgroupid,
157            rfer: self.rfer,
158            rfer_cmpr: self.rfer_cmpr,
159            excl: self.excl,
160            excl_cmpr: self.excl_cmpr,
161            limit_flags: QgroupLimitFlags::from_bits_truncate(self.limit_flags),
162            max_rfer: if self.limit_flags & BTRFS_QGROUP_LIMIT_MAX_RFER as u64 != 0 {
163                self.max_rfer
164            } else {
165                u64::MAX
166            },
167            max_excl: if self.limit_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL as u64 != 0 {
168                self.max_excl
169            } else {
170                u64::MAX
171            },
172            parents: self.parents,
173            children: self.children,
174            stale,
175        }
176    }
177}
178
179#[inline]
180fn rle64(buf: &[u8], off: usize) -> u64 {
181    u64::from_le_bytes(buf[off..off + 8].try_into().unwrap())
182}
183
184fn parse_status_flags(data: &[u8]) -> Option<u64> {
185    let off = offset_of!(btrfs_qgroup_status_item, flags);
186    if data.len() < off + field_size!(btrfs_qgroup_status_item, flags) {
187        return None;
188    }
189    Some(rle64(data, off))
190}
191
192fn parse_info(builder: &mut QgroupEntryBuilder, data: &[u8]) {
193    if data.len() < size_of::<btrfs_qgroup_info_item>() {
194        return;
195    }
196
197    builder.has_info = true;
198    builder.rfer = rle64(data, offset_of!(btrfs_qgroup_info_item, rfer));
199    builder.rfer_cmpr = rle64(data, offset_of!(btrfs_qgroup_info_item, rfer_cmpr));
200    builder.excl = rle64(data, offset_of!(btrfs_qgroup_info_item, excl));
201    builder.excl_cmpr = rle64(data, offset_of!(btrfs_qgroup_info_item, excl_cmpr));
202}
203
204fn parse_limit(builder: &mut QgroupEntryBuilder, data: &[u8]) {
205    let end = offset_of!(btrfs_qgroup_limit_item, max_excl)
206        + field_size!(btrfs_qgroup_limit_item, max_excl);
207    if data.len() < end {
208        return;
209    }
210
211    builder.has_limit = true;
212    builder.limit_flags = rle64(data, offset_of!(btrfs_qgroup_limit_item, flags));
213    builder.max_rfer = rle64(data, offset_of!(btrfs_qgroup_limit_item, max_rfer));
214    builder.max_excl = rle64(data, offset_of!(btrfs_qgroup_limit_item, max_excl));
215}
216
217// ---------------------------------------------------------------------------
218// Public API
219// ---------------------------------------------------------------------------
220
221/// Create a new qgroup with the given `qgroupid` on the filesystem referred
222/// to by `fd`.
223///
224/// `qgroupid` is the packed form: `(level << 48) | subvolid`.
225pub fn qgroup_create(fd: BorrowedFd, qgroupid: u64) -> nix::Result<()> {
226    let mut args: btrfs_ioctl_qgroup_create_args = unsafe { mem::zeroed() };
227    args.create = 1;
228    args.qgroupid = qgroupid;
229    // SAFETY: args is fully initialised above and lives for the duration of
230    // the ioctl call.
231    unsafe { btrfs_ioc_qgroup_create(fd.as_raw_fd(), &args) }?;
232    Ok(())
233}
234
235/// Destroy the qgroup with the given `qgroupid` on the filesystem referred
236/// to by `fd`.
237pub fn qgroup_destroy(fd: BorrowedFd, qgroupid: u64) -> nix::Result<()> {
238    let mut args: btrfs_ioctl_qgroup_create_args = unsafe { mem::zeroed() };
239    args.create = 0;
240    args.qgroupid = qgroupid;
241    // SAFETY: args is fully initialised above and lives for the duration of
242    // the ioctl call.
243    unsafe { btrfs_ioc_qgroup_create(fd.as_raw_fd(), &args) }?;
244    Ok(())
245}
246
247/// Assign qgroup `src` as a member of qgroup `dst` (i.e. `src` becomes a
248/// child of `dst`).
249///
250/// Returns `true` if the kernel indicates that a quota rescan is now needed
251/// (the ioctl returned a positive value).
252pub fn qgroup_assign(fd: BorrowedFd, src: u64, dst: u64) -> nix::Result<bool> {
253    let mut args: btrfs_ioctl_qgroup_assign_args = unsafe { mem::zeroed() };
254    args.assign = 1;
255    args.src = src;
256    args.dst = dst;
257    // SAFETY: args is fully initialised above and lives for the duration of
258    // the ioctl call.
259    let ret = unsafe { btrfs_ioc_qgroup_assign(fd.as_raw_fd(), &args) }?;
260    Ok(ret > 0)
261}
262
263/// Remove the child–parent relationship between qgroups `src` and `dst`.
264///
265/// Returns `true` if the kernel indicates that a quota rescan is now needed.
266pub fn qgroup_remove(fd: BorrowedFd, src: u64, dst: u64) -> nix::Result<bool> {
267    let mut args: btrfs_ioctl_qgroup_assign_args = unsafe { mem::zeroed() };
268    args.assign = 0;
269    args.src = src;
270    args.dst = dst;
271    // SAFETY: args is fully initialised above and lives for the duration of
272    // the ioctl call.
273    let ret = unsafe { btrfs_ioc_qgroup_assign(fd.as_raw_fd(), &args) }?;
274    Ok(ret > 0)
275}
276
277/// Set usage limits on a qgroup.
278///
279/// Pass `QgroupLimitFlags::MAX_RFER` in `flags` to enforce `max_rfer`, and/or
280/// `QgroupLimitFlags::MAX_EXCL` to enforce `max_excl`.  Clear a limit by
281/// omitting the corresponding flag.
282pub fn qgroup_limit(
283    fd: BorrowedFd,
284    qgroupid: u64,
285    flags: QgroupLimitFlags,
286    max_rfer: u64,
287    max_excl: u64,
288) -> nix::Result<()> {
289    let lim = btrfs_qgroup_limit {
290        flags: flags.bits(),
291        max_referenced: max_rfer,
292        max_exclusive: max_excl,
293        rsv_referenced: 0,
294        rsv_exclusive: 0,
295    };
296    let mut args: btrfs_ioctl_qgroup_limit_args = unsafe { mem::zeroed() };
297    args.qgroupid = qgroupid;
298    args.lim = lim;
299    // SAFETY: args is fully initialised above and lives for the duration of
300    // the ioctl call.  The ioctl number is #43 (_IOR direction in the kernel
301    // header), which reads args from userspace.
302    unsafe { btrfs_ioc_qgroup_limit(fd.as_raw_fd(), &mut args) }?;
303    Ok(())
304}
305
306/// List all qgroups and overall quota status for the filesystem referred to
307/// by `fd`.
308///
309/// Returns `Ok(QgroupList { status_flags: empty, qgroups: [] })` when quota
310/// accounting is not enabled (`ENOENT` from the kernel).
311pub fn qgroup_list(fd: BorrowedFd) -> nix::Result<QgroupList> {
312    // Build a map of qgroupid → builder as we walk the quota tree.
313    let mut builders: HashMap<u64, QgroupEntryBuilder> = HashMap::new();
314    let mut status_flags = QgroupStatusFlags::empty();
315
316    // Scan the quota tree for STATUS / INFO / LIMIT / RELATION items in one pass.
317    let quota_key = SearchKey {
318        tree_id: BTRFS_QUOTA_TREE_OBJECTID as u64,
319        min_objectid: 0,
320        max_objectid: u64::MAX,
321        min_type: BTRFS_QGROUP_STATUS_KEY as u32,
322        max_type: BTRFS_QGROUP_RELATION_KEY as u32,
323        min_offset: 0,
324        max_offset: u64::MAX,
325        min_transid: 0,
326        max_transid: u64::MAX,
327    };
328
329    let scan_result = tree_search(fd, quota_key, |hdr, data| {
330        match hdr.item_type as u32 {
331            t if t == BTRFS_QGROUP_STATUS_KEY as u32 => {
332                if let Some(raw) = parse_status_flags(data) {
333                    status_flags = QgroupStatusFlags::from_bits_truncate(raw);
334                }
335            }
336            t if t == BTRFS_QGROUP_INFO_KEY as u32 => {
337                // offset = qgroupid
338                let entry = builders.entry(hdr.offset).or_default();
339                parse_info(entry, data);
340            }
341            t if t == BTRFS_QGROUP_LIMIT_KEY as u32 => {
342                // offset = qgroupid
343                let entry = builders.entry(hdr.offset).or_default();
344                parse_limit(entry, data);
345            }
346            t if t == BTRFS_QGROUP_RELATION_KEY as u32 => {
347                // The kernel stores two entries per relation:
348                //   (child, RELATION_KEY, parent)
349                //   (parent, RELATION_KEY, child)
350                // Only process the canonical form where objectid > offset,
351                // i.e. parent > child.
352                if hdr.objectid > hdr.offset {
353                    let parent = hdr.objectid;
354                    let child = hdr.offset;
355                    builders.entry(child).or_default().parents.push(parent);
356                    builders.entry(parent).or_default().children.push(child);
357                }
358            }
359            _ => {}
360        }
361        Ok(())
362    });
363
364    match scan_result {
365        Err(Errno::ENOENT) => {
366            // Quota tree does not exist — quotas are disabled.
367            return Ok(QgroupList {
368                status_flags: QgroupStatusFlags::empty(),
369                qgroups: Vec::new(),
370            });
371        }
372        Err(e) => return Err(e),
373        Ok(()) => {}
374    }
375
376    // Collect existing subvolume IDs so we can mark stale level-0 qgroups.
377    let existing_subvol_ids = collect_subvol_ids(fd)?;
378
379    // Convert builders to QgroupInfo, computing stale flag for level-0 groups.
380    let mut qgroups: Vec<QgroupInfo> = builders
381        .into_iter()
382        .map(|(qgroupid, builder)| {
383            let stale = if qgroupid_level(qgroupid) == 0 {
384                !existing_subvol_ids.contains(&qgroupid_subvolid(qgroupid))
385            } else {
386                false
387            };
388            builder.build(qgroupid, stale)
389        })
390        .collect();
391
392    qgroups.sort_by_key(|q| q.qgroupid);
393
394    Ok(QgroupList {
395        status_flags,
396        qgroups,
397    })
398}
399
400/// Collect the set of all existing subvolume IDs by scanning
401/// `ROOT_ITEM_KEY` entries in the root tree.
402fn collect_subvol_ids(fd: BorrowedFd) -> nix::Result<HashSet<u64>> {
403    let mut ids: HashSet<u64> = HashSet::new();
404
405    // BTRFS_LAST_FREE_OBJECTID binds as i32 = -256; cast to u64 gives
406    // 0xFFFFFFFF_FFFFFF00 as expected.
407    let key = SearchKey::for_objectid_range(
408        BTRFS_ROOT_TREE_OBJECTID as u64,
409        BTRFS_ROOT_ITEM_KEY as u32,
410        BTRFS_FIRST_FREE_OBJECTID as u64,
411        BTRFS_LAST_FREE_OBJECTID as u64,
412    );
413
414    tree_search(fd, key, |hdr, _data| {
415        ids.insert(hdr.objectid);
416        Ok(())
417    })?;
418
419    Ok(ids)
420}
421
422/// Destroy all "stale" level-0 qgroups — those whose corresponding subvolume
423/// no longer exists.
424///
425/// In simple-quota mode (`SIMPLE_MODE` flag set), stale qgroups with non-zero
426/// `rfer` or `excl` are retained because they hold accounting information for
427/// dropped subvolumes.
428///
429/// Returns the number of qgroups successfully destroyed.
430pub fn qgroup_clear_stale(fd: BorrowedFd) -> nix::Result<usize> {
431    let list = qgroup_list(fd)?;
432    let simple_mode = list.status_flags.contains(QgroupStatusFlags::SIMPLE_MODE);
433
434    let mut count = 0usize;
435
436    for qg in &list.qgroups {
437        // Only process level-0 stale qgroups.
438        if qgroupid_level(qg.qgroupid) != 0 || !qg.stale {
439            continue;
440        }
441
442        // In simple-quota mode, keep stale qgroups that still have usage data.
443        if simple_mode && (qg.rfer != 0 || qg.excl != 0) {
444            continue;
445        }
446
447        if qgroup_destroy(fd, qg.qgroupid).is_ok() {
448            count += 1;
449        }
450    }
451
452    Ok(count)
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn qgroupid_level_zero() {
461        assert_eq!(qgroupid_level(5), 0);
462        assert_eq!(qgroupid_level(256), 0);
463    }
464
465    #[test]
466    fn qgroupid_level_nonzero() {
467        let id = (1u64 << 48) | 100;
468        assert_eq!(qgroupid_level(id), 1);
469
470        let id = (3u64 << 48) | 42;
471        assert_eq!(qgroupid_level(id), 3);
472    }
473
474    #[test]
475    fn qgroupid_subvolid_extracts_lower_48_bits() {
476        assert_eq!(qgroupid_subvolid(256), 256);
477        assert_eq!(qgroupid_subvolid((1u64 << 48) | 100), 100);
478        assert_eq!(qgroupid_subvolid((2u64 << 48) | 0), 0);
479    }
480
481    #[test]
482    fn qgroupid_roundtrip() {
483        let level: u64 = 2;
484        let subvolid: u64 = 999;
485        let packed = (level << 48) | subvolid;
486        assert_eq!(qgroupid_level(packed), level as u16);
487        assert_eq!(qgroupid_subvolid(packed), subvolid);
488    }
489}