Skip to main content

btrfs_uapi/
quota.rs

1//! # Quota and qgroup management: enabling quotas and tracking disk usage
2//!
3//! Quota accounting tracks disk usage per subvolume via qgroups (quota groups).
4//! It must be explicitly enabled before any qgroup limits or usage data are
5//! available.  Once enabled, usage numbers are maintained incrementally by the
6//! kernel; a rescan rebuilds them from scratch if they become inconsistent.
7//!
8//! Every subvolume automatically gets a level-0 qgroup whose ID matches the
9//! subvolume ID.  Higher-level qgroups can be created and linked into a
10//! parent-child hierarchy so that space usage rolls up through the tree.
11//!
12//! Quota status (whether quotas are on, which mode, inconsistency flag) is
13//! read from sysfs via [`crate::sysfs::SysfsBtrfs::quota_status`].
14//!
15//! Most operations require `CAP_SYS_ADMIN`.
16
17use crate::{
18    field_size,
19    raw::{
20        BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID,
21        BTRFS_QGROUP_INFO_KEY, BTRFS_QGROUP_LIMIT_EXCL_CMPR,
22        BTRFS_QGROUP_LIMIT_KEY, BTRFS_QGROUP_LIMIT_MAX_EXCL,
23        BTRFS_QGROUP_LIMIT_MAX_RFER, BTRFS_QGROUP_LIMIT_RFER_CMPR,
24        BTRFS_QGROUP_RELATION_KEY, BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT,
25        BTRFS_QGROUP_STATUS_FLAG_ON, BTRFS_QGROUP_STATUS_FLAG_RESCAN,
26        BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE, BTRFS_QGROUP_STATUS_KEY,
27        BTRFS_QUOTA_CTL_DISABLE, BTRFS_QUOTA_CTL_ENABLE,
28        BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA, BTRFS_QUOTA_TREE_OBJECTID,
29        BTRFS_ROOT_ITEM_KEY, BTRFS_ROOT_TREE_OBJECTID, btrfs_ioc_qgroup_assign,
30        btrfs_ioc_qgroup_create, btrfs_ioc_qgroup_limit, btrfs_ioc_quota_ctl,
31        btrfs_ioc_quota_rescan, btrfs_ioc_quota_rescan_status,
32        btrfs_ioc_quota_rescan_wait, btrfs_ioctl_qgroup_assign_args,
33        btrfs_ioctl_qgroup_create_args, btrfs_ioctl_qgroup_limit_args,
34        btrfs_ioctl_quota_ctl_args, btrfs_ioctl_quota_rescan_args,
35        btrfs_qgroup_info_item, btrfs_qgroup_limit, btrfs_qgroup_limit_item,
36        btrfs_qgroup_status_item,
37    },
38    tree_search::{SearchKey, tree_search},
39};
40use bitflags::bitflags;
41use nix::errno::Errno;
42use std::{
43    collections::{HashMap, HashSet},
44    mem::{self, offset_of, size_of},
45    os::{fd::AsRawFd, unix::io::BorrowedFd},
46};
47
48/// Enable quota accounting on the filesystem referred to by `fd`.
49///
50/// When `simple` is `true`, uses `BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA`, which
51/// accounts for extent ownership by lifetime rather than backref walks. This is
52/// faster but less precise than full qgroup accounting.
53pub fn quota_enable(fd: BorrowedFd, simple: bool) -> nix::Result<()> {
54    let cmd = if simple {
55        BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA as u64
56    } else {
57        BTRFS_QUOTA_CTL_ENABLE as u64
58    };
59    let mut args: btrfs_ioctl_quota_ctl_args = unsafe { mem::zeroed() };
60    args.cmd = cmd;
61    unsafe { btrfs_ioc_quota_ctl(fd.as_raw_fd(), &mut args) }?;
62    Ok(())
63}
64
65/// Disable quota accounting on the filesystem referred to by `fd`.
66pub fn quota_disable(fd: BorrowedFd) -> nix::Result<()> {
67    let mut args: btrfs_ioctl_quota_ctl_args = unsafe { mem::zeroed() };
68    args.cmd = BTRFS_QUOTA_CTL_DISABLE as u64;
69    unsafe { btrfs_ioc_quota_ctl(fd.as_raw_fd(), &mut args) }?;
70    Ok(())
71}
72
73/// Start a quota rescan on the filesystem referred to by `fd`.
74///
75/// Returns immediately after kicking off the background scan. Use
76/// [`quota_rescan_wait`] to block until it finishes. If a rescan is already
77/// in progress the kernel returns `EINPROGRESS`; callers that are about to
78/// wait anyway can treat that as a non-error.
79pub fn quota_rescan(fd: BorrowedFd) -> nix::Result<()> {
80    let args: btrfs_ioctl_quota_rescan_args = unsafe { mem::zeroed() };
81    unsafe { btrfs_ioc_quota_rescan(fd.as_raw_fd(), &args) }?;
82    Ok(())
83}
84
85/// Block until the quota rescan currently running on the filesystem referred
86/// to by `fd` completes. Returns immediately if no rescan is in progress.
87pub fn quota_rescan_wait(fd: BorrowedFd) -> nix::Result<()> {
88    unsafe { btrfs_ioc_quota_rescan_wait(fd.as_raw_fd()) }?;
89    Ok(())
90}
91
92/// Status of an in-progress (or absent) quota rescan.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct QuotaRescanStatus {
95    /// Whether a rescan is currently running.
96    pub running: bool,
97    /// Object ID of the most recently scanned tree item. Only meaningful
98    /// when `running` is `true`.
99    pub progress: u64,
100}
101
102/// Query the status of the quota rescan on the filesystem referred to by `fd`.
103pub fn quota_rescan_status(fd: BorrowedFd) -> nix::Result<QuotaRescanStatus> {
104    let mut args: btrfs_ioctl_quota_rescan_args = unsafe { mem::zeroed() };
105    unsafe { btrfs_ioc_quota_rescan_status(fd.as_raw_fd(), &mut args) }?;
106    Ok(QuotaRescanStatus {
107        running: args.flags != 0,
108        progress: args.progress,
109    })
110}
111
112/// Extract the hierarchy level from a packed qgroup ID.
113///
114/// `qgroupid = (level << 48) | subvolid`.  Level 0 qgroups correspond
115/// directly to subvolumes.
116#[inline]
117pub fn qgroupid_level(qgroupid: u64) -> u16 {
118    (qgroupid >> 48) as u16
119}
120
121/// Extract the subvolume ID component from a packed qgroup ID.
122///
123/// Only meaningful for level-0 qgroups.
124#[inline]
125pub fn qgroupid_subvolid(qgroupid: u64) -> u64 {
126    qgroupid & 0x0000_FFFF_FFFF_FFFF
127}
128
129bitflags! {
130    /// Status flags for the quota tree as a whole (`BTRFS_QGROUP_STATUS_KEY`).
131    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
132    pub struct QgroupStatusFlags: u64 {
133        /// Quota accounting is enabled.
134        const ON           = BTRFS_QGROUP_STATUS_FLAG_ON as u64;
135        /// A rescan is currently in progress.
136        const RESCAN       = BTRFS_QGROUP_STATUS_FLAG_RESCAN as u64;
137        /// Accounting is inconsistent and a rescan is needed.
138        const INCONSISTENT = BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT as u64;
139        /// Simple quota mode (squota) is active.
140        const SIMPLE_MODE  = BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE as u64;
141    }
142}
143
144bitflags! {
145    /// Which limit fields are actively enforced on a qgroup.
146    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
147    pub struct QgroupLimitFlags: u64 {
148        /// `max_rfer` (maximum referenced bytes) is enforced.
149        const MAX_RFER  = BTRFS_QGROUP_LIMIT_MAX_RFER as u64;
150        /// `max_excl` (maximum exclusive bytes) is enforced.
151        const MAX_EXCL  = BTRFS_QGROUP_LIMIT_MAX_EXCL as u64;
152        /// Referenced bytes are compressed before comparison.
153        const RFER_CMPR = BTRFS_QGROUP_LIMIT_RFER_CMPR as u64;
154        /// Exclusive bytes are compressed before comparison.
155        const EXCL_CMPR = BTRFS_QGROUP_LIMIT_EXCL_CMPR as u64;
156    }
157}
158
159/// Usage and limit information for a single qgroup.
160#[derive(Debug, Clone)]
161pub struct QgroupInfo {
162    /// Packed qgroup ID: `(level << 48) | subvolid`.
163    pub qgroupid: u64,
164    /// Total referenced bytes (includes shared data).
165    pub rfer: u64,
166    /// Referenced bytes after compression.
167    pub rfer_cmpr: u64,
168    /// Exclusively-owned bytes (not shared with any other subvolume).
169    pub excl: u64,
170    /// Exclusively-owned bytes after compression.
171    pub excl_cmpr: u64,
172    /// Limit flags — which of the limit fields below are enforced.
173    pub limit_flags: QgroupLimitFlags,
174    /// Maximum referenced bytes.  `u64::MAX` when no limit is set.
175    pub max_rfer: u64,
176    /// Maximum exclusive bytes.  `u64::MAX` when no limit is set.
177    pub max_excl: u64,
178    /// IDs of qgroups that are parents of this one in the hierarchy.
179    pub parents: Vec<u64>,
180    /// IDs of qgroups that are children of this one in the hierarchy.
181    pub children: Vec<u64>,
182    /// Level-0 only: `true` when the corresponding subvolume no longer
183    /// exists (this is a "stale" qgroup left behind after deletion).
184    pub stale: bool,
185}
186
187/// Result of [`qgroup_list`]: overall quota status and per-qgroup details.
188#[derive(Debug, Clone)]
189pub struct QgroupList {
190    /// Flags from the `BTRFS_QGROUP_STATUS_KEY` item.
191    pub status_flags: QgroupStatusFlags,
192    /// All qgroups found in the quota tree, sorted by `qgroupid`.
193    pub qgroups: Vec<QgroupInfo>,
194}
195
196#[derive(Default)]
197struct QgroupEntryBuilder {
198    // From INFO item
199    has_info: bool,
200    rfer: u64,
201    rfer_cmpr: u64,
202    excl: u64,
203    excl_cmpr: u64,
204    // From LIMIT item
205    has_limit: bool,
206    limit_flags: u64,
207    max_rfer: u64,
208    max_excl: u64,
209    // From RELATION items
210    parents: Vec<u64>,
211    children: Vec<u64>,
212}
213
214impl QgroupEntryBuilder {
215    fn build(self, qgroupid: u64, stale: bool) -> QgroupInfo {
216        QgroupInfo {
217            qgroupid,
218            rfer: self.rfer,
219            rfer_cmpr: self.rfer_cmpr,
220            excl: self.excl,
221            excl_cmpr: self.excl_cmpr,
222            limit_flags: QgroupLimitFlags::from_bits_truncate(self.limit_flags),
223            max_rfer: if self.limit_flags & BTRFS_QGROUP_LIMIT_MAX_RFER as u64
224                != 0
225            {
226                self.max_rfer
227            } else {
228                u64::MAX
229            },
230            max_excl: if self.limit_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL as u64
231                != 0
232            {
233                self.max_excl
234            } else {
235                u64::MAX
236            },
237            parents: self.parents,
238            children: self.children,
239            stale,
240        }
241    }
242}
243
244#[inline]
245fn rle64(buf: &[u8], off: usize) -> u64 {
246    u64::from_le_bytes(buf[off..off + 8].try_into().unwrap())
247}
248
249fn parse_status_flags(data: &[u8]) -> Option<u64> {
250    let off = offset_of!(btrfs_qgroup_status_item, flags);
251    if data.len() < off + field_size!(btrfs_qgroup_status_item, flags) {
252        return None;
253    }
254    Some(rle64(data, off))
255}
256
257fn parse_info(builder: &mut QgroupEntryBuilder, data: &[u8]) {
258    if data.len() < size_of::<btrfs_qgroup_info_item>() {
259        return;
260    }
261
262    builder.has_info = true;
263    builder.rfer = rle64(data, offset_of!(btrfs_qgroup_info_item, rfer));
264    builder.rfer_cmpr =
265        rle64(data, offset_of!(btrfs_qgroup_info_item, rfer_cmpr));
266    builder.excl = rle64(data, offset_of!(btrfs_qgroup_info_item, excl));
267    builder.excl_cmpr =
268        rle64(data, offset_of!(btrfs_qgroup_info_item, excl_cmpr));
269}
270
271fn parse_limit(builder: &mut QgroupEntryBuilder, data: &[u8]) {
272    let end = offset_of!(btrfs_qgroup_limit_item, max_excl)
273        + field_size!(btrfs_qgroup_limit_item, max_excl);
274    if data.len() < end {
275        return;
276    }
277
278    builder.has_limit = true;
279    builder.limit_flags =
280        rle64(data, offset_of!(btrfs_qgroup_limit_item, flags));
281    builder.max_rfer =
282        rle64(data, offset_of!(btrfs_qgroup_limit_item, max_rfer));
283    builder.max_excl =
284        rle64(data, offset_of!(btrfs_qgroup_limit_item, max_excl));
285}
286
287/// Create a new qgroup with the given `qgroupid` on the filesystem referred
288/// to by `fd`.
289///
290/// `qgroupid` is the packed form: `(level << 48) | subvolid`.
291pub fn qgroup_create(fd: BorrowedFd, qgroupid: u64) -> nix::Result<()> {
292    let mut args: btrfs_ioctl_qgroup_create_args = unsafe { mem::zeroed() };
293    args.create = 1;
294    args.qgroupid = qgroupid;
295    // SAFETY: args is fully initialised above and lives for the duration of
296    // the ioctl call.
297    unsafe { btrfs_ioc_qgroup_create(fd.as_raw_fd(), &args) }?;
298    Ok(())
299}
300
301/// Destroy the qgroup with the given `qgroupid` on the filesystem referred
302/// to by `fd`.
303pub fn qgroup_destroy(fd: BorrowedFd, qgroupid: u64) -> nix::Result<()> {
304    let mut args: btrfs_ioctl_qgroup_create_args = unsafe { mem::zeroed() };
305    args.create = 0;
306    args.qgroupid = qgroupid;
307    // SAFETY: args is fully initialised above and lives for the duration of
308    // the ioctl call.
309    unsafe { btrfs_ioc_qgroup_create(fd.as_raw_fd(), &args) }?;
310    Ok(())
311}
312
313/// Assign qgroup `src` as a member of qgroup `dst` (i.e. `src` becomes a
314/// child of `dst`).
315///
316/// Returns `true` if the kernel indicates that a quota rescan is now needed
317/// (the ioctl returned a positive value).
318pub fn qgroup_assign(fd: BorrowedFd, src: u64, dst: u64) -> nix::Result<bool> {
319    let mut args: btrfs_ioctl_qgroup_assign_args = unsafe { mem::zeroed() };
320    args.assign = 1;
321    args.src = src;
322    args.dst = dst;
323    // SAFETY: args is fully initialised above and lives for the duration of
324    // the ioctl call.
325    let ret = unsafe { btrfs_ioc_qgroup_assign(fd.as_raw_fd(), &args) }?;
326    Ok(ret > 0)
327}
328
329/// Remove the child–parent relationship between qgroups `src` and `dst`.
330///
331/// Returns `true` if the kernel indicates that a quota rescan is now needed.
332pub fn qgroup_remove(fd: BorrowedFd, src: u64, dst: u64) -> nix::Result<bool> {
333    let mut args: btrfs_ioctl_qgroup_assign_args = unsafe { mem::zeroed() };
334    args.assign = 0;
335    args.src = src;
336    args.dst = dst;
337    // SAFETY: args is fully initialised above and lives for the duration of
338    // the ioctl call.
339    let ret = unsafe { btrfs_ioc_qgroup_assign(fd.as_raw_fd(), &args) }?;
340    Ok(ret > 0)
341}
342
343/// Set usage limits on a qgroup.
344///
345/// Pass `QgroupLimitFlags::MAX_RFER` in `flags` to enforce `max_rfer`, and/or
346/// `QgroupLimitFlags::MAX_EXCL` to enforce `max_excl`.  Clear a limit by
347/// omitting the corresponding flag.
348pub fn qgroup_limit(
349    fd: BorrowedFd,
350    qgroupid: u64,
351    flags: QgroupLimitFlags,
352    max_rfer: u64,
353    max_excl: u64,
354) -> nix::Result<()> {
355    let lim = btrfs_qgroup_limit {
356        flags: flags.bits(),
357        max_referenced: max_rfer,
358        max_exclusive: max_excl,
359        rsv_referenced: 0,
360        rsv_exclusive: 0,
361    };
362    let mut args: btrfs_ioctl_qgroup_limit_args = unsafe { mem::zeroed() };
363    args.qgroupid = qgroupid;
364    args.lim = lim;
365    // SAFETY: args is fully initialised above and lives for the duration of
366    // the ioctl call.  The ioctl number is #43 (_IOR direction in the kernel
367    // header), which reads args from userspace.
368    unsafe { btrfs_ioc_qgroup_limit(fd.as_raw_fd(), &mut args) }?;
369    Ok(())
370}
371
372/// List all qgroups and overall quota status for the filesystem referred to
373/// by `fd`.
374///
375/// Returns `Ok(QgroupList { status_flags: empty, qgroups: [] })` when quota
376/// accounting is not enabled (`ENOENT` from the kernel).
377pub fn qgroup_list(fd: BorrowedFd) -> nix::Result<QgroupList> {
378    // Build a map of qgroupid → builder as we walk the quota tree.
379    let mut builders: HashMap<u64, QgroupEntryBuilder> = HashMap::new();
380    let mut status_flags = QgroupStatusFlags::empty();
381
382    // Scan the quota tree for STATUS / INFO / LIMIT / RELATION items in one pass.
383    let quota_key = SearchKey {
384        tree_id: BTRFS_QUOTA_TREE_OBJECTID as u64,
385        min_objectid: 0,
386        max_objectid: u64::MAX,
387        min_type: BTRFS_QGROUP_STATUS_KEY as u32,
388        max_type: BTRFS_QGROUP_RELATION_KEY as u32,
389        min_offset: 0,
390        max_offset: u64::MAX,
391        min_transid: 0,
392        max_transid: u64::MAX,
393    };
394
395    let scan_result = tree_search(fd, quota_key, |hdr, data| {
396        match hdr.item_type as u32 {
397            t if t == BTRFS_QGROUP_STATUS_KEY as u32 => {
398                if let Some(raw) = parse_status_flags(data) {
399                    status_flags = QgroupStatusFlags::from_bits_truncate(raw);
400                }
401            }
402            t if t == BTRFS_QGROUP_INFO_KEY as u32 => {
403                // offset = qgroupid
404                let entry = builders.entry(hdr.offset).or_default();
405                parse_info(entry, data);
406            }
407            t if t == BTRFS_QGROUP_LIMIT_KEY as u32 => {
408                // offset = qgroupid
409                let entry = builders.entry(hdr.offset).or_default();
410                parse_limit(entry, data);
411            }
412            t if t == BTRFS_QGROUP_RELATION_KEY as u32 => {
413                // The kernel stores two entries per relation:
414                //   (child, RELATION_KEY, parent)
415                //   (parent, RELATION_KEY, child)
416                // Only process the canonical form where objectid > offset,
417                // i.e. parent > child.
418                if hdr.objectid > hdr.offset {
419                    let parent = hdr.objectid;
420                    let child = hdr.offset;
421                    builders.entry(child).or_default().parents.push(parent);
422                    builders.entry(parent).or_default().children.push(child);
423                }
424            }
425            _ => {}
426        }
427        Ok(())
428    });
429
430    match scan_result {
431        Err(Errno::ENOENT) => {
432            // Quota tree does not exist — quotas are disabled.
433            return Ok(QgroupList {
434                status_flags: QgroupStatusFlags::empty(),
435                qgroups: Vec::new(),
436            });
437        }
438        Err(e) => return Err(e),
439        Ok(()) => {}
440    }
441
442    // Collect existing subvolume IDs so we can mark stale level-0 qgroups.
443    let existing_subvol_ids = collect_subvol_ids(fd)?;
444
445    // Convert builders to QgroupInfo, computing stale flag for level-0 groups.
446    let mut qgroups: Vec<QgroupInfo> = builders
447        .into_iter()
448        .map(|(qgroupid, builder)| {
449            let stale = if qgroupid_level(qgroupid) == 0 {
450                !existing_subvol_ids.contains(&qgroupid_subvolid(qgroupid))
451            } else {
452                false
453            };
454            builder.build(qgroupid, stale)
455        })
456        .collect();
457
458    qgroups.sort_by_key(|q| q.qgroupid);
459
460    Ok(QgroupList {
461        status_flags,
462        qgroups,
463    })
464}
465
466/// Collect the set of all existing subvolume IDs by scanning
467/// `ROOT_ITEM_KEY` entries in the root tree.
468fn collect_subvol_ids(fd: BorrowedFd) -> nix::Result<HashSet<u64>> {
469    let mut ids: HashSet<u64> = HashSet::new();
470
471    // BTRFS_LAST_FREE_OBJECTID binds as i32 = -256; cast to u64 gives
472    // 0xFFFFFFFF_FFFFFF00 as expected.
473    let key = SearchKey::for_objectid_range(
474        BTRFS_ROOT_TREE_OBJECTID as u64,
475        BTRFS_ROOT_ITEM_KEY as u32,
476        BTRFS_FIRST_FREE_OBJECTID as u64,
477        BTRFS_LAST_FREE_OBJECTID as u64,
478    );
479
480    tree_search(fd, key, |hdr, _data| {
481        ids.insert(hdr.objectid);
482        Ok(())
483    })?;
484
485    Ok(ids)
486}
487
488/// Destroy all "stale" level-0 qgroups — those whose corresponding subvolume
489/// no longer exists.
490///
491/// In simple-quota mode (`SIMPLE_MODE` flag set), stale qgroups with non-zero
492/// `rfer` or `excl` are retained because they hold accounting information for
493/// dropped subvolumes.
494///
495/// Returns the number of qgroups successfully destroyed.
496pub fn qgroup_clear_stale(fd: BorrowedFd) -> nix::Result<usize> {
497    let list = qgroup_list(fd)?;
498    let simple_mode =
499        list.status_flags.contains(QgroupStatusFlags::SIMPLE_MODE);
500
501    let mut count = 0usize;
502
503    for qg in &list.qgroups {
504        // Only process level-0 stale qgroups.
505        if qgroupid_level(qg.qgroupid) != 0 || !qg.stale {
506            continue;
507        }
508
509        // In simple-quota mode, keep stale qgroups that still have usage data.
510        if simple_mode && (qg.rfer != 0 || qg.excl != 0) {
511            continue;
512        }
513
514        if qgroup_destroy(fd, qg.qgroupid).is_ok() {
515            count += 1;
516        }
517    }
518
519    Ok(count)
520}
521
522#[cfg(test)]
523mod tests {
524    use super::*;
525
526    #[test]
527    fn qgroupid_level_zero() {
528        assert_eq!(qgroupid_level(5), 0);
529        assert_eq!(qgroupid_level(256), 0);
530    }
531
532    #[test]
533    fn qgroupid_level_nonzero() {
534        let id = (1u64 << 48) | 100;
535        assert_eq!(qgroupid_level(id), 1);
536
537        let id = (3u64 << 48) | 42;
538        assert_eq!(qgroupid_level(id), 3);
539    }
540
541    #[test]
542    fn qgroupid_subvolid_extracts_lower_48_bits() {
543        assert_eq!(qgroupid_subvolid(256), 256);
544        assert_eq!(qgroupid_subvolid((1u64 << 48) | 100), 100);
545        assert_eq!(qgroupid_subvolid((2u64 << 48) | 0), 0);
546    }
547
548    #[test]
549    fn qgroupid_roundtrip() {
550        let level: u64 = 2;
551        let subvolid: u64 = 999;
552        let packed = (level << 48) | subvolid;
553        assert_eq!(qgroupid_level(packed), level as u16);
554        assert_eq!(qgroupid_subvolid(packed), subvolid);
555    }
556}