Skip to main content

btrfs_uapi/
subvolume.rs

1//! # Subvolume and snapshot management: creating, deleting, and querying subvolumes
2//!
3//! Subvolumes are independently snapshotable subtrees within a btrfs filesystem.
4//! Snapshots are subvolumes created as copy-on-write clones of an existing
5//! subvolume.  This module covers the full lifecycle: creating and deleting
6//! subvolumes and snapshots, reading subvolume metadata and flags, listing all
7//! subvolumes in a filesystem, and getting or setting the default subvolume
8//! that is mounted when no subvolume is explicitly requested.
9
10use crate::{
11    field_size,
12    raw::{
13        BTRFS_DIR_ITEM_KEY, BTRFS_FIRST_FREE_OBJECTID, BTRFS_FS_TREE_OBJECTID,
14        BTRFS_LAST_FREE_OBJECTID, BTRFS_ROOT_BACKREF_KEY, BTRFS_ROOT_ITEM_KEY,
15        BTRFS_ROOT_TREE_DIR_OBJECTID, BTRFS_ROOT_TREE_OBJECTID,
16        BTRFS_SUBVOL_QGROUP_INHERIT, BTRFS_SUBVOL_RDONLY,
17        BTRFS_SUBVOL_SPEC_BY_ID, btrfs_ioc_default_subvol,
18        btrfs_ioc_get_subvol_info, btrfs_ioc_ino_lookup,
19        btrfs_ioc_snap_create_v2, btrfs_ioc_snap_destroy_v2,
20        btrfs_ioc_subvol_create_v2, btrfs_ioc_subvol_getflags,
21        btrfs_ioc_subvol_setflags, btrfs_ioctl_get_subvol_info_args,
22        btrfs_ioctl_ino_lookup_args, btrfs_ioctl_vol_args_v2,
23        btrfs_qgroup_inherit, btrfs_root_item, btrfs_timespec,
24    },
25    tree_search::{SearchKey, tree_search},
26};
27use bitflags::bitflags;
28use nix::libc::c_char;
29use std::{
30    collections::HashMap,
31    ffi::CStr,
32    mem,
33    os::{fd::AsRawFd, unix::io::BorrowedFd},
34    time::{Duration, SystemTime, UNIX_EPOCH},
35};
36use uuid::Uuid;
37
38/// The top-level subvolume (FS tree); objectid 5, always present.
39///
40/// Returned by [`subvolume_default_get`] when no explicit default has been set.
41pub const FS_TREE_OBJECTID: u64 = BTRFS_FS_TREE_OBJECTID as u64;
42
43bitflags! {
44    /// Flags on a btrfs subvolume (the `flags` field of the root item /
45    /// `BTRFS_IOC_SUBVOL_{GET,SET}FLAGS`).
46    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
47    pub struct SubvolumeFlags: u64 {
48        /// Subvolume is read-only.
49        const RDONLY = 1 << 1;
50    }
51}
52
53impl std::fmt::Display for SubvolumeFlags {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        if self.contains(SubvolumeFlags::RDONLY) {
56            write!(f, "readonly")
57        } else {
58            write!(f, "-")
59        }
60    }
61}
62
63/// Subvolume metadata returned by `BTRFS_IOC_GET_SUBVOL_INFO`.
64#[derive(Debug, Clone)]
65pub struct SubvolumeInfo {
66    /// Root ID (subvolume ID) of this subvolume.
67    pub id: u64,
68    /// Name of this subvolume within its parent directory.
69    pub name: String,
70    /// Root ID of the parent subvolume.
71    pub parent_id: u64,
72    /// Inode number of the directory within the parent that holds this subvolume.
73    pub dir_id: u64,
74    /// Current generation of the subvolume.
75    pub generation: u64,
76    /// Subvolume flags (e.g. read-only).
77    pub flags: SubvolumeFlags,
78    /// UUID of this subvolume.
79    pub uuid: Uuid,
80    /// UUID of the parent subvolume (non-nil for snapshots).
81    pub parent_uuid: Uuid,
82    /// UUID of the received subvolume (non-nil for received snapshots).
83    pub received_uuid: Uuid,
84    /// Transaction ID when the subvolume was last changed.
85    pub ctransid: u64,
86    /// Transaction ID when the subvolume was created.
87    pub otransid: u64,
88    /// Transaction ID when a send was last performed.
89    pub stransid: u64,
90    /// Transaction ID when this subvolume was last received.
91    pub rtransid: u64,
92    /// Time of last change.
93    pub ctime: SystemTime,
94    /// Creation time.
95    pub otime: SystemTime,
96    /// Time of last send.
97    pub stime: SystemTime,
98    /// Time of last receive.
99    pub rtime: SystemTime,
100}
101
102/// A single subvolume entry returned by [`subvolume_list`].
103#[derive(Debug, Clone)]
104pub struct SubvolumeListItem {
105    /// Root ID (subvolume ID).
106    pub root_id: u64,
107    /// Root ID of the parent subvolume (`0` if unknown / not found).
108    pub parent_id: u64,
109    /// Inode of the directory in the parent that contains this subvolume.
110    pub dir_id: u64,
111    /// Current generation.
112    pub generation: u64,
113    /// Subvolume flags.
114    pub flags: SubvolumeFlags,
115    /// UUID of this subvolume.
116    pub uuid: Uuid,
117    /// UUID of the parent subvolume.
118    pub parent_uuid: Uuid,
119    /// UUID of the received subvolume.
120    pub received_uuid: Uuid,
121    /// Transaction ID when created.
122    pub otransid: u64,
123    /// Creation time.
124    pub otime: SystemTime,
125    /// Subvolume name (the leaf name within its parent).
126    ///
127    /// Full path resolution relative to the filesystem root requires
128    /// `BTRFS_IOC_INO_LOOKUP` and is not yet implemented; this field contains
129    /// only the leaf name as stored in the root backref item.
130    pub name: String,
131}
132
133/// Write `name` into the `name` union member of a zeroed
134/// `btrfs_ioctl_vol_args_v2`, returning `ENAMETOOLONG` if it does not fit.
135fn set_v2_name(
136    args: &mut btrfs_ioctl_vol_args_v2,
137    name: &CStr,
138) -> nix::Result<()> {
139    let bytes = name.to_bytes(); // excludes nul terminator
140    // SAFETY: name is the active union member; the struct is already zeroed so
141    // the implicit nul terminator is already present.
142    let name_buf: &mut [c_char] = unsafe { &mut args.__bindgen_anon_2.name };
143    if bytes.len() >= name_buf.len() {
144        return Err(nix::errno::Errno::ENAMETOOLONG);
145    }
146    for (i, &b) in bytes.iter().enumerate() {
147        name_buf[i] = b as c_char;
148    }
149    Ok(())
150}
151
152/// Build a `btrfs_qgroup_inherit` buffer for the given qgroup IDs.
153///
154/// The returned `Vec<u64>` is sized to hold the base struct plus the trailing
155/// `qgroups[]` array, with 8-byte alignment guaranteed by the `u64` element
156/// type.
157fn build_qgroup_inherit(qgroups: &[u64]) -> Vec<u64> {
158    let base_size = mem::size_of::<btrfs_qgroup_inherit>();
159    let total_size = base_size + qgroups.len() * mem::size_of::<u64>();
160    let num_u64 = (total_size + 7) / 8;
161    let mut buf = vec![0u64; num_u64];
162
163    // SAFETY: buf is large enough and zeroed; we write through a properly
164    // aligned pointer (btrfs_qgroup_inherit has 8-byte alignment).
165    let inherit =
166        unsafe { &mut *(buf.as_mut_ptr() as *mut btrfs_qgroup_inherit) };
167    inherit.num_qgroups = qgroups.len() as u64;
168
169    // Write the qgroup IDs into the flexible array member.
170    if !qgroups.is_empty() {
171        let array = unsafe { inherit.qgroups.as_mut_slice(qgroups.len()) };
172        array.copy_from_slice(qgroups);
173    }
174
175    buf
176}
177
178/// Set the `BTRFS_SUBVOL_QGROUP_INHERIT` fields on a `vol_args_v2` struct.
179///
180/// `buf` must be the buffer returned by `build_qgroup_inherit`.
181fn set_qgroup_inherit(
182    args: &mut btrfs_ioctl_vol_args_v2,
183    buf: &[u64],
184    num_qgroups: usize,
185) {
186    args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT as u64;
187    let base_size = mem::size_of::<btrfs_qgroup_inherit>();
188    let total_size = base_size + num_qgroups * mem::size_of::<u64>();
189    args.__bindgen_anon_1.__bindgen_anon_1.size = total_size as u64;
190    args.__bindgen_anon_1.__bindgen_anon_1.qgroup_inherit =
191        buf.as_ptr() as *mut btrfs_qgroup_inherit;
192}
193
194/// Create a new subvolume named `name` inside the directory referred to by
195/// `parent_fd`.
196///
197/// `name` must be a plain leaf name (no slashes).  The caller is responsible
198/// for opening the correct parent directory.  If `qgroups` is non-empty, the
199/// new subvolume is added to those qgroups.  Requires `CAP_SYS_ADMIN`.
200pub fn subvolume_create(
201    parent_fd: BorrowedFd,
202    name: &CStr,
203    qgroups: &[u64],
204) -> nix::Result<()> {
205    let mut args: btrfs_ioctl_vol_args_v2 = unsafe { mem::zeroed() };
206    set_v2_name(&mut args, name)?;
207
208    let inherit_buf;
209    if !qgroups.is_empty() {
210        inherit_buf = build_qgroup_inherit(qgroups);
211        set_qgroup_inherit(&mut args, &inherit_buf, qgroups.len());
212    }
213
214    unsafe { btrfs_ioc_subvol_create_v2(parent_fd.as_raw_fd(), &args) }?;
215    Ok(())
216}
217
218/// Delete the subvolume or snapshot named `name` from the directory referred
219/// to by `parent_fd`.
220///
221/// `name` must be a plain leaf name (no slashes).  Requires `CAP_SYS_ADMIN`.
222pub fn subvolume_delete(parent_fd: BorrowedFd, name: &CStr) -> nix::Result<()> {
223    let mut args: btrfs_ioctl_vol_args_v2 = unsafe { mem::zeroed() };
224    set_v2_name(&mut args, name)?;
225    unsafe { btrfs_ioc_snap_destroy_v2(parent_fd.as_raw_fd(), &args) }?;
226    Ok(())
227}
228
229/// Delete a subvolume by its numeric subvolume ID.
230///
231/// `fd` must be an open file descriptor on the filesystem (typically the mount
232/// point).  Unlike `subvolume_delete`, this does not require knowing the
233/// subvolume's path.  Requires `CAP_SYS_ADMIN`.
234pub fn subvolume_delete_by_id(
235    fd: BorrowedFd,
236    subvolid: u64,
237) -> nix::Result<()> {
238    let mut args: btrfs_ioctl_vol_args_v2 = unsafe { mem::zeroed() };
239    args.flags = BTRFS_SUBVOL_SPEC_BY_ID as u64;
240    args.__bindgen_anon_2.subvolid = subvolid;
241    unsafe { btrfs_ioc_snap_destroy_v2(fd.as_raw_fd(), &args) }?;
242    Ok(())
243}
244
245/// Create a snapshot of the subvolume referred to by `source_fd`, placing it
246/// as `name` inside the directory referred to by `parent_fd`.
247///
248/// If `readonly` is `true` the new snapshot is created read-only.  If
249/// `qgroups` is non-empty, the new snapshot is added to those qgroups.
250/// Requires `CAP_SYS_ADMIN`.
251pub fn snapshot_create(
252    parent_fd: BorrowedFd,
253    source_fd: BorrowedFd,
254    name: &CStr,
255    readonly: bool,
256    qgroups: &[u64],
257) -> nix::Result<()> {
258    let mut args: btrfs_ioctl_vol_args_v2 = unsafe { mem::zeroed() };
259    // The `fd` field carries the source subvolume file descriptor.
260    args.fd = source_fd.as_raw_fd() as i64;
261    if readonly {
262        args.flags = BTRFS_SUBVOL_RDONLY as u64;
263    }
264    set_v2_name(&mut args, name)?;
265
266    let inherit_buf;
267    if !qgroups.is_empty() {
268        inherit_buf = build_qgroup_inherit(qgroups);
269        set_qgroup_inherit(&mut args, &inherit_buf, qgroups.len());
270    }
271
272    unsafe { btrfs_ioc_snap_create_v2(parent_fd.as_raw_fd(), &args) }?;
273    Ok(())
274}
275
276/// Query detailed information about the subvolume that `fd` belongs to.
277///
278/// `fd` can be any file or directory within the target subvolume.
279/// Does not require elevated privileges.
280pub fn subvolume_info(fd: BorrowedFd) -> nix::Result<SubvolumeInfo> {
281    subvolume_info_by_id(fd, 0)
282}
283
284/// Query detailed information about a subvolume by its numeric root ID.
285///
286/// `fd` can be any open file descriptor on the filesystem.  If `rootid` is 0,
287/// the subvolume that `fd` belongs to is queried (equivalent to
288/// `subvolume_info`).  Does not require elevated privileges.
289pub fn subvolume_info_by_id(
290    fd: BorrowedFd,
291    rootid: u64,
292) -> nix::Result<SubvolumeInfo> {
293    let mut raw: btrfs_ioctl_get_subvol_info_args = unsafe { mem::zeroed() };
294    raw.treeid = rootid;
295    unsafe { btrfs_ioc_get_subvol_info(fd.as_raw_fd(), &mut raw) }?;
296
297    let name = unsafe { CStr::from_ptr(raw.name.as_ptr()) }
298        .to_string_lossy()
299        .into_owned();
300
301    Ok(SubvolumeInfo {
302        id: raw.treeid,
303        name,
304        parent_id: raw.parent_id,
305        dir_id: raw.dirid,
306        generation: raw.generation,
307        flags: SubvolumeFlags::from_bits_truncate(raw.flags),
308        uuid: Uuid::from_bytes(raw.uuid),
309        parent_uuid: Uuid::from_bytes(raw.parent_uuid),
310        received_uuid: Uuid::from_bytes(raw.received_uuid),
311        ctransid: raw.ctransid,
312        otransid: raw.otransid,
313        stransid: raw.stransid,
314        rtransid: raw.rtransid,
315        ctime: ioctl_timespec_to_system_time(raw.ctime.sec, raw.ctime.nsec),
316        otime: ioctl_timespec_to_system_time(raw.otime.sec, raw.otime.nsec),
317        stime: ioctl_timespec_to_system_time(raw.stime.sec, raw.stime.nsec),
318        rtime: ioctl_timespec_to_system_time(raw.rtime.sec, raw.rtime.nsec),
319    })
320}
321
322/// Read the flags of the subvolume that `fd` belongs to.
323pub fn subvolume_flags_get(fd: BorrowedFd) -> nix::Result<SubvolumeFlags> {
324    let mut flags: u64 = 0;
325    unsafe { btrfs_ioc_subvol_getflags(fd.as_raw_fd(), &mut flags) }?;
326    Ok(SubvolumeFlags::from_bits_truncate(flags))
327}
328
329/// Set the flags of the subvolume that `fd` belongs to.
330///
331/// Requires `CAP_SYS_ADMIN` to make a subvolume read-only; any user may
332/// clear the read-only flag from a subvolume they own.
333pub fn subvolume_flags_set(
334    fd: BorrowedFd,
335    flags: SubvolumeFlags,
336) -> nix::Result<()> {
337    let raw: u64 = flags.bits();
338    unsafe { btrfs_ioc_subvol_setflags(fd.as_raw_fd(), &raw) }?;
339    Ok(())
340}
341
342/// Query the ID of the default subvolume of the filesystem referred to by
343/// `fd`.
344///
345/// Searches the root tree for the `BTRFS_DIR_ITEM_KEY` entry at objectid
346/// `BTRFS_ROOT_TREE_DIR_OBJECTID` that stores the default subvolume ID.
347/// Returns [`FS_TREE_OBJECTID`] if no default has been set.
348///
349/// Requires `CAP_SYS_ADMIN`.
350pub fn subvolume_default_get(fd: BorrowedFd) -> nix::Result<u64> {
351    let mut default_id: Option<u64> = None;
352
353    tree_search(
354        fd,
355        SearchKey::for_objectid_range(
356            BTRFS_ROOT_TREE_OBJECTID as u64,
357            BTRFS_DIR_ITEM_KEY as u32,
358            BTRFS_ROOT_TREE_DIR_OBJECTID as u64,
359            BTRFS_ROOT_TREE_DIR_OBJECTID as u64,
360        ),
361        |_hdr, data| {
362            use crate::raw::btrfs_dir_item;
363            use std::mem::{offset_of, size_of};
364
365            let header_size = size_of::<btrfs_dir_item>();
366            if data.len() < header_size {
367                return Ok(());
368            }
369            let name_off = offset_of!(btrfs_dir_item, name_len);
370            let name_len =
371                u16::from_le_bytes([data[name_off], data[name_off + 1]])
372                    as usize;
373            if data.len() < header_size + name_len {
374                return Ok(());
375            }
376            let item_name = &data[header_size..header_size + name_len];
377            if item_name == b"default" {
378                let loc_off = offset_of!(btrfs_dir_item, location);
379                let target_id = u64::from_le_bytes(
380                    data[loc_off..loc_off + 8].try_into().unwrap(),
381                );
382                default_id = Some(target_id);
383            }
384            Ok(())
385        },
386    )?;
387
388    Ok(default_id.unwrap_or(BTRFS_FS_TREE_OBJECTID as u64))
389}
390
391/// Set the default subvolume of the filesystem referred to by `fd` to
392/// `subvolid`.
393///
394/// Pass [`FS_TREE_OBJECTID`] to restore the default.  Requires `CAP_SYS_ADMIN`.
395pub fn subvolume_default_set(fd: BorrowedFd, subvolid: u64) -> nix::Result<()> {
396    unsafe { btrfs_ioc_default_subvol(fd.as_raw_fd(), &subvolid) }?;
397    Ok(())
398}
399
400/// List all user subvolumes and snapshots in the filesystem referred to by
401/// `fd` by walking the root tree.
402///
403/// Two tree-search passes are made:
404/// 1. `ROOT_ITEM_KEY` — reads each subvolume's metadata (generation, flags,
405///    UUIDs, creation time).
406/// 2. `BTRFS_ROOT_BACKREF_KEY` — reads each subvolume's parent ID and leaf name.
407///
408/// Subvolumes for which no backref is found are still included; their
409/// `parent_id`, `dir_id`, and `name` will be zeroed / empty.
410///
411/// Requires `CAP_SYS_ADMIN`.
412pub fn subvolume_list(fd: BorrowedFd) -> nix::Result<Vec<SubvolumeListItem>> {
413    let mut items: Vec<SubvolumeListItem> = Vec::new();
414
415    tree_search(
416        fd,
417        SearchKey::for_objectid_range(
418            BTRFS_ROOT_TREE_OBJECTID as u64,
419            BTRFS_ROOT_ITEM_KEY as u32,
420            BTRFS_FIRST_FREE_OBJECTID as u64,
421            BTRFS_LAST_FREE_OBJECTID as u64,
422        ),
423        |hdr, data| {
424            if let Some(item) = parse_root_item(hdr.objectid, data) {
425                items.push(item);
426            }
427            Ok(())
428        },
429    )?;
430
431    tree_search(
432        fd,
433        SearchKey::for_objectid_range(
434            BTRFS_ROOT_TREE_OBJECTID as u64,
435            BTRFS_ROOT_BACKREF_KEY as u32,
436            BTRFS_FIRST_FREE_OBJECTID as u64,
437            BTRFS_LAST_FREE_OBJECTID as u64,
438        ),
439        |hdr, data| {
440            // ROOT_BACKREF_KEY: objectid = subvol root_id, offset = parent root_id
441            let root_id = hdr.objectid;
442            let parent_id = hdr.offset;
443
444            if let Some(item) = items.iter_mut().find(|i| i.root_id == root_id)
445            {
446                // Only take the first ROOT_BACKREF for each subvolume.  A
447                // subvolume can have multiple ROOT_BACKREF entries when it is
448                // referenced from more than one parent (e.g. the subvolume
449                // also appears as a snapshot inside another subvolume).
450                // Items are returned in offset-ascending order, so the first
451                // entry has the smallest parent_id — which is the canonical
452                // location btrfs-progs uses for "top level" and path.
453                if item.parent_id == 0 {
454                    item.parent_id = parent_id;
455                    if let Some((dir_id, name)) = parse_root_ref(data) {
456                        item.dir_id = dir_id;
457                        item.name = name;
458                    }
459                }
460            }
461            Ok(())
462        },
463    )?;
464
465    // Determine which subvolume the fd is open on.  Paths are expressed
466    // relative to this subvolume, matching btrfs-progs behaviour.
467    let top_id =
468        crate::inode::lookup_path_rootid(fd).unwrap_or(FS_TREE_OBJECTID);
469
470    resolve_full_paths(fd, &mut items, top_id)?;
471
472    Ok(items)
473}
474
475/// Call `BTRFS_IOC_INO_LOOKUP` for `dir_id` within `parent_tree` and return
476/// the path from that tree's root to the directory.
477///
478/// The kernel returns the path with a trailing `/` when the directory is not
479/// the tree root, and an empty string when `dir_id` is the tree root itself.
480/// This prefix can be concatenated directly with the subvolume's leaf name to
481/// form the full segment within the parent.
482fn ino_lookup_dir_path(
483    fd: BorrowedFd,
484    parent_tree: u64,
485    dir_id: u64,
486) -> nix::Result<String> {
487    let mut args = btrfs_ioctl_ino_lookup_args {
488        treeid: parent_tree,
489        objectid: dir_id,
490        ..unsafe { mem::zeroed() }
491    };
492    // SAFETY: args is a valid, zeroed btrfs_ioctl_ino_lookup_args; the ioctl
493    // fills in args.name with a null-terminated path string.
494    unsafe { btrfs_ioc_ino_lookup(fd.as_raw_fd(), &mut args) }?;
495
496    // args.name is [c_char; 4080]; find the null terminator and decode.
497    let name_ptr: *const c_char = args.name.as_ptr();
498    // SAFETY: the ioctl guarantees null termination within the 4080-byte buffer.
499    let cstr = unsafe { CStr::from_ptr(name_ptr) };
500    Ok(cstr.to_string_lossy().into_owned())
501}
502
503/// Resolve the `name` field of every item in `items` from a bare leaf name to
504/// the full path relative to the filesystem root.
505///
506/// For each subvolume we already have `parent_id`, `dir_id`, and the leaf name
507/// from the ROOT_BACKREF pass.  A single `BTRFS_IOC_INO_LOOKUP` call per item
508/// gives the path from the parent tree's root down to the directory that
509/// contains the subvolume (the "dir prefix").  Concatenating that prefix with
510/// the leaf name yields the subvolume's segment within its parent.  Walking up
511/// the parent chain (using the in-memory items map) and joining segments with
512/// `/` gives the final full path.
513fn resolve_full_paths(
514    fd: BorrowedFd,
515    items: &mut Vec<SubvolumeListItem>,
516    top_id: u64,
517) -> nix::Result<()> {
518    // Map root_id → index for O(1) parent lookups.
519    let id_to_idx: HashMap<u64, usize> = items
520        .iter()
521        .enumerate()
522        .map(|(i, item)| (item.root_id, i))
523        .collect();
524
525    // Compute the "segment" for each item: the path of this subvolume within
526    // its immediate parent (dir prefix from INO_LOOKUP + leaf name).
527    // If INO_LOOKUP is not available (e.g. missing CAP_SYS_ADMIN), fall back
528    // to the bare leaf name so the list still works.
529    let segments: Vec<String> = items
530        .iter()
531        .map(|item| {
532            if item.parent_id == 0 || item.name.is_empty() {
533                return item.name.clone();
534            }
535            match ino_lookup_dir_path(fd, item.parent_id, item.dir_id) {
536                Ok(prefix) => format!("{}{}", prefix, item.name),
537                Err(_) => item.name.clone(),
538            }
539        })
540        .collect();
541
542    // Walk each item's parent chain, joining segments, and cache results so
543    // every ancestor is resolved at most once.
544    let mut full_paths: HashMap<u64, String> = HashMap::new();
545    let root_ids: Vec<u64> = items.iter().map(|i| i.root_id).collect();
546    for root_id in root_ids {
547        build_full_path(
548            root_id,
549            top_id,
550            &id_to_idx,
551            &segments,
552            items,
553            &mut full_paths,
554        );
555    }
556
557    for item in items.iter_mut() {
558        if let Some(path) = full_paths.remove(&item.root_id) {
559            item.name = path;
560        }
561    }
562
563    Ok(())
564}
565
566/// Compute the full path for `root_id`, memoizing into `cache`.
567///
568/// Walks the ancestor chain iteratively to avoid stack overflow on deep
569/// subvolume trees.  Collects segments from the target up to the FS tree
570/// root, then joins them in reverse order.
571///
572/// Cycle detection is included: ROOT_BACKREF entries can form mutual parent
573/// relationships (e.g. a snapshot stored inside the subvolume it was taken
574/// from), which would otherwise loop forever.
575fn build_full_path(
576    root_id: u64,
577    top_id: u64,
578    id_to_idx: &HashMap<u64, usize>,
579    segments: &[String],
580    items: &[SubvolumeListItem],
581    cache: &mut HashMap<u64, String>,
582) -> String {
583    // Collect the chain of root_ids from `root_id` up to the top subvolume
584    // (the one the fd is open on) or the FS tree root, stopping early if we
585    // hit an already-cached ancestor or a cycle.
586    let mut chain: Vec<u64> = Vec::new();
587    let mut visited: HashMap<u64, usize> = HashMap::new();
588    let mut cur = root_id;
589    loop {
590        if cache.contains_key(&cur) {
591            break;
592        }
593        if visited.contains_key(&cur) {
594            // Cycle detected: truncate the chain back to where the cycle
595            // starts so we don't join nonsensical repeated segments.
596            let cycle_start = visited[&cur];
597            chain.truncate(cycle_start);
598            break;
599        }
600        let Some(&idx) = id_to_idx.get(&cur) else {
601            break;
602        };
603        visited.insert(cur, chain.len());
604        chain.push(cur);
605        let parent = items[idx].parent_id;
606        if parent == 0
607            || parent == FS_TREE_OBJECTID
608            || parent == top_id
609            || !id_to_idx.contains_key(&parent)
610        {
611            break;
612        }
613        cur = parent;
614    }
615
616    // Resolve each node in the chain from root toward the target, building
617    // on any already-cached prefix we stopped at.
618    for &id in chain.iter().rev() {
619        let Some(&idx) = id_to_idx.get(&id) else {
620            cache.insert(id, String::new());
621            continue;
622        };
623        let segment = &segments[idx];
624        let parent_id = items[idx].parent_id;
625
626        let full_path = if parent_id == 0
627            || parent_id == FS_TREE_OBJECTID
628            || parent_id == top_id
629            || !id_to_idx.contains_key(&parent_id)
630        {
631            segment.clone()
632        } else if let Some(parent_path) = cache.get(&parent_id) {
633            if parent_path.is_empty() {
634                segment.clone()
635            } else {
636                format!("{}/{}", parent_path, segment)
637            }
638        } else {
639            segment.clone()
640        };
641
642        cache.insert(id, full_path);
643    }
644
645    cache.get(&root_id).cloned().unwrap_or_default()
646}
647
648/// `btrfs_root_item` field offsets (packed, LE).
649fn parse_root_item(root_id: u64, data: &[u8]) -> Option<SubvolumeListItem> {
650    use std::mem::offset_of;
651
652    // Items shorter than generation_v2 are "legacy" and do not carry
653    // UUID / otime / otransid fields.
654    let legacy_boundary = offset_of!(btrfs_root_item, generation_v2);
655    if data.len() < legacy_boundary {
656        return None;
657    }
658
659    let generation = rle64(data, offset_of!(btrfs_root_item, generation));
660    let flags_raw = rle64(data, offset_of!(btrfs_root_item, flags));
661    let flags = SubvolumeFlags::from_bits_truncate(flags_raw);
662
663    // Extended fields exist only in non-legacy items.
664    let otime_nsec =
665        offset_of!(btrfs_root_item, otime) + offset_of!(btrfs_timespec, nsec);
666    let (uuid, parent_uuid, received_uuid, otransid, otime) =
667        if data.len() >= otime_nsec + field_size!(btrfs_timespec, nsec) {
668            let off_uuid = offset_of!(btrfs_root_item, uuid);
669            let off_parent = offset_of!(btrfs_root_item, parent_uuid);
670            let off_received = offset_of!(btrfs_root_item, received_uuid);
671            let uuid_size = field_size!(btrfs_root_item, uuid);
672            let uuid = Uuid::from_bytes(
673                data[off_uuid..off_uuid + uuid_size].try_into().unwrap(),
674            );
675            let parent_uuid = Uuid::from_bytes(
676                data[off_parent..off_parent + uuid_size].try_into().unwrap(),
677            );
678            let received_uuid = Uuid::from_bytes(
679                data[off_received..off_received + uuid_size]
680                    .try_into()
681                    .unwrap(),
682            );
683            let otransid = rle64(data, offset_of!(btrfs_root_item, otransid));
684            let otime_sec = offset_of!(btrfs_root_item, otime);
685            let otime = timespec_to_system_time(
686                rle64(data, otime_sec),
687                rle32(data, otime_nsec),
688            );
689            (uuid, parent_uuid, received_uuid, otransid, otime)
690        } else {
691            (Uuid::nil(), Uuid::nil(), Uuid::nil(), 0, UNIX_EPOCH)
692        };
693
694    Some(SubvolumeListItem {
695        root_id,
696        parent_id: 0,
697        dir_id: 0,
698        generation,
699        flags,
700        uuid,
701        parent_uuid,
702        received_uuid,
703        otransid,
704        otime,
705        name: String::new(),
706    })
707}
708
709/// Parse a `btrfs_root_ref` payload (packed, LE). The name immediately
710/// follows the fixed-size header.
711fn parse_root_ref(data: &[u8]) -> Option<(u64, String)> {
712    use crate::raw::btrfs_root_ref;
713    use std::mem::{offset_of, size_of};
714
715    let header_size = size_of::<btrfs_root_ref>();
716    if data.len() < header_size {
717        return None;
718    }
719    let dir_id = rle64(data, offset_of!(btrfs_root_ref, dirid));
720    let name_off = offset_of!(btrfs_root_ref, name_len);
721    let name_len =
722        u16::from_le_bytes([data[name_off], data[name_off + 1]]) as usize;
723    if data.len() < header_size + name_len {
724        return None;
725    }
726    let name =
727        String::from_utf8_lossy(&data[header_size..header_size + name_len])
728            .into_owned();
729    Some((dir_id, name))
730}
731
732#[inline]
733fn rle64(buf: &[u8], off: usize) -> u64 {
734    u64::from_le_bytes(buf[off..off + 8].try_into().unwrap())
735}
736
737#[inline]
738fn rle32(buf: &[u8], off: usize) -> u32 {
739    u32::from_le_bytes(buf[off..off + 4].try_into().unwrap())
740}
741
742/// Convert an on-disk `btrfs_timespec` (LE sec + LE nsec, packed) to
743/// [`SystemTime`].  Returns [`UNIX_EPOCH`] if sec == 0.
744fn timespec_to_system_time(sec: u64, nsec: u32) -> SystemTime {
745    if sec == 0 {
746        return UNIX_EPOCH;
747    }
748    UNIX_EPOCH + Duration::new(sec, nsec)
749}
750
751/// Convert a `btrfs_ioctl_timespec` (host byte order, with padding) to
752/// [`SystemTime`].  Returns [`UNIX_EPOCH`] if sec == 0.
753fn ioctl_timespec_to_system_time(sec: u64, nsec: u32) -> SystemTime {
754    if sec == 0 {
755        return UNIX_EPOCH;
756    }
757    UNIX_EPOCH + Duration::new(sec, nsec)
758}
759
760#[cfg(test)]
761mod tests {
762    use super::*;
763    use std::{
764        collections::HashMap,
765        time::{Duration, UNIX_EPOCH},
766    };
767    use uuid::Uuid;
768
769    fn test_item(root_id: u64, parent_id: u64) -> SubvolumeListItem {
770        SubvolumeListItem {
771            root_id,
772            parent_id,
773            dir_id: 0,
774            generation: 0,
775            flags: SubvolumeFlags::empty(),
776            uuid: Uuid::nil(),
777            parent_uuid: Uuid::nil(),
778            received_uuid: Uuid::nil(),
779            otransid: 0,
780            otime: UNIX_EPOCH,
781            name: String::new(),
782        }
783    }
784
785    #[test]
786    fn rle64_reads_little_endian() {
787        let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
788        assert_eq!(rle64(&buf, 0), 0x0807060504030201);
789    }
790
791    #[test]
792    fn rle64_at_offset() {
793        let buf = [0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
794        assert_eq!(rle64(&buf, 2), 1);
795    }
796
797    #[test]
798    fn rle32_reads_little_endian() {
799        let buf = [0x78, 0x56, 0x34, 0x12];
800        assert_eq!(rle32(&buf, 0), 0x12345678);
801    }
802
803    #[test]
804    fn rle32_at_offset() {
805        let buf = [0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
806        assert_eq!(rle32(&buf, 2), 1);
807    }
808
809    #[test]
810    fn timespec_zero_returns_epoch() {
811        assert_eq!(timespec_to_system_time(0, 0), UNIX_EPOCH);
812    }
813
814    #[test]
815    fn timespec_zero_sec_with_nonzero_nsec_returns_epoch() {
816        // sec==0 triggers the early return regardless of nsec
817        assert_eq!(timespec_to_system_time(0, 500_000_000), UNIX_EPOCH);
818    }
819
820    #[test]
821    fn timespec_nonzero_returns_correct_time() {
822        let t = timespec_to_system_time(1000, 500);
823        assert_eq!(t, UNIX_EPOCH + Duration::new(1000, 500));
824    }
825
826    #[test]
827    fn subvolume_flags_display_readonly() {
828        let flags = SubvolumeFlags::RDONLY;
829        assert_eq!(format!("{}", flags), "readonly");
830    }
831
832    #[test]
833    fn subvolume_flags_display_empty() {
834        let flags = SubvolumeFlags::empty();
835        assert_eq!(format!("{}", flags), "-");
836    }
837
838    #[test]
839    fn parse_root_ref_valid() {
840        // btrfs_root_ref: dirid (8 LE) + sequence (8 LE) + name_len (2 LE) + name bytes
841        let name = b"mysubvol";
842        let mut buf = Vec::new();
843        buf.extend_from_slice(&42u64.to_le_bytes()); // dirid
844        buf.extend_from_slice(&1u64.to_le_bytes()); // sequence
845        buf.extend_from_slice(&(name.len() as u16).to_le_bytes()); // name_len
846        buf.extend_from_slice(name);
847
848        let result = parse_root_ref(&buf);
849        assert!(result.is_some());
850        let (dir_id, parsed_name) = result.unwrap();
851        assert_eq!(dir_id, 42);
852        assert_eq!(parsed_name, "mysubvol");
853    }
854
855    #[test]
856    fn parse_root_ref_too_short_header() {
857        // Less than 18 bytes (sizeof btrfs_root_ref)
858        let buf = [0u8; 10];
859        assert!(parse_root_ref(&buf).is_none());
860    }
861
862    #[test]
863    fn parse_root_ref_too_short_name() {
864        // Header claims 10-byte name but buffer only has the header
865        let mut buf = vec![0u8; 18];
866        // Set name_len = 10 at offset 16
867        buf[16] = 10;
868        buf[17] = 0;
869        assert!(parse_root_ref(&buf).is_none());
870    }
871
872    #[test]
873    fn parse_root_ref_empty_name() {
874        let mut buf = Vec::new();
875        buf.extend_from_slice(&100u64.to_le_bytes()); // dirid
876        buf.extend_from_slice(&0u64.to_le_bytes()); // sequence
877        buf.extend_from_slice(&0u16.to_le_bytes()); // name_len = 0
878
879        let result = parse_root_ref(&buf);
880        assert!(result.is_some());
881        let (dir_id, parsed_name) = result.unwrap();
882        assert_eq!(dir_id, 100);
883        assert_eq!(parsed_name, "");
884    }
885
886    #[test]
887    fn build_full_path_single_subvol_parent_fs_tree() {
888        // Subvolume 256, parent is FS_TREE (5)
889        let items = vec![test_item(256, FS_TREE_OBJECTID)];
890        let segments = vec!["mysub".to_string()];
891        let id_to_idx: HashMap<u64, usize> = [(256, 0)].into();
892        let mut cache = HashMap::new();
893
894        let path = build_full_path(
895            256,
896            FS_TREE_OBJECTID,
897            &id_to_idx,
898            &segments,
899            &items,
900            &mut cache,
901        );
902        assert_eq!(path, "mysub");
903    }
904
905    #[test]
906    fn build_full_path_nested_chain() {
907        // A (256) -> B (257) -> C (258), all parented under FS_TREE
908        let items = vec![
909            test_item(256, FS_TREE_OBJECTID),
910            test_item(257, 256),
911            test_item(258, 257),
912        ];
913        let segments = vec!["A".to_string(), "B".to_string(), "C".to_string()];
914        let id_to_idx: HashMap<u64, usize> =
915            [(256, 0), (257, 1), (258, 2)].into();
916        let mut cache = HashMap::new();
917
918        let path = build_full_path(
919            258,
920            FS_TREE_OBJECTID,
921            &id_to_idx,
922            &segments,
923            &items,
924            &mut cache,
925        );
926        assert_eq!(path, "A/B/C");
927    }
928
929    #[test]
930    fn build_full_path_stops_at_top_id() {
931        // A (256) -> B (257) -> C (258), top_id = 257 (B)
932        // Paths are relative to top_id, so C's parent (257) == top_id means
933        // C's path is just its own segment, not "A/B/C".
934        let items = vec![
935            test_item(256, FS_TREE_OBJECTID),
936            test_item(257, 256),
937            test_item(258, 257),
938        ];
939        let segments = vec!["A".to_string(), "B".to_string(), "C".to_string()];
940        let id_to_idx: HashMap<u64, usize> =
941            [(256, 0), (257, 1), (258, 2)].into();
942        let mut cache = HashMap::new();
943
944        let path = build_full_path(
945            258, 257, &id_to_idx, &segments, &items, &mut cache,
946        );
947        assert_eq!(path, "C");
948
949        // B's path is also just "B" (its parent 256/A is below top_id in the
950        // tree, but B's own parent is not top_id — A's parent is FS_TREE).
951        // With top_id=257, building B: parent=256, 256 is in id_to_idx but
952        // 256's parent is FS_TREE (5) which triggers the stop, so chain = [257, 256],
953        // and A gets its segment, B gets "A/B".
954        let path_b = build_full_path(
955            257, 257, &id_to_idx, &segments, &items, &mut cache,
956        );
957        // 257 itself: its parent is 256, 256 != top_id (257), so we walk up.
958        // 256's parent is FS_TREE (5), which triggers stop. chain = [257, 256].
959        // 256 resolves to "A" (parent is FS_TREE), 257 resolves to "A/B".
960        assert_eq!(path_b, "A/B");
961    }
962
963    #[test]
964    fn build_full_path_cycle_detection() {
965        // A (256) parent is B (257), B (257) parent is A (256) — mutual cycle
966        let items = vec![test_item(256, 257), test_item(257, 256)];
967        let segments = vec!["A".to_string(), "B".to_string()];
968        let id_to_idx: HashMap<u64, usize> = [(256, 0), (257, 1)].into();
969        let mut cache = HashMap::new();
970
971        // Must not hang. The result is truncated due to cycle detection.
972        let _path = build_full_path(
973            256,
974            FS_TREE_OBJECTID,
975            &id_to_idx,
976            &segments,
977            &items,
978            &mut cache,
979        );
980        // Just verify it terminates and returns something (exact value depends
981        // on cycle truncation heuristic).
982    }
983
984    #[test]
985    fn build_full_path_cached_ancestor() {
986        // A (256) -> B (257) -> C (258)
987        // Pre-cache B's path; building C should use it.
988        let items = vec![
989            test_item(256, FS_TREE_OBJECTID),
990            test_item(257, 256),
991            test_item(258, 257),
992        ];
993        let segments = vec!["A".to_string(), "B".to_string(), "C".to_string()];
994        let id_to_idx: HashMap<u64, usize> =
995            [(256, 0), (257, 1), (258, 2)].into();
996        let mut cache = HashMap::new();
997        cache.insert(257, "A/B".to_string());
998
999        let path = build_full_path(
1000            258,
1001            FS_TREE_OBJECTID,
1002            &id_to_idx,
1003            &segments,
1004            &items,
1005            &mut cache,
1006        );
1007        assert_eq!(path, "A/B/C");
1008    }
1009
1010    #[test]
1011    fn build_full_path_unknown_parent() {
1012        // Subvolume 256, parent 999 not in id_to_idx
1013        let items = vec![test_item(256, 999)];
1014        let segments = vec!["orphan".to_string()];
1015        let id_to_idx: HashMap<u64, usize> = [(256, 0)].into();
1016        let mut cache = HashMap::new();
1017
1018        let path = build_full_path(
1019            256,
1020            FS_TREE_OBJECTID,
1021            &id_to_idx,
1022            &segments,
1023            &items,
1024            &mut cache,
1025        );
1026        assert_eq!(path, "orphan");
1027    }
1028
1029    #[test]
1030    fn build_full_path_parent_id_zero() {
1031        // Subvolume 256, parent_id == 0 (no backref found)
1032        let items = vec![test_item(256, 0)];
1033        let segments = vec!["noparent".to_string()];
1034        let id_to_idx: HashMap<u64, usize> = [(256, 0)].into();
1035        let mut cache = HashMap::new();
1036
1037        let path = build_full_path(
1038            256,
1039            FS_TREE_OBJECTID,
1040            &id_to_idx,
1041            &segments,
1042            &items,
1043            &mut cache,
1044        );
1045        assert_eq!(path, "noparent");
1046    }
1047
1048    #[test]
1049    fn build_full_path_already_cached_target() {
1050        // If the target itself is already cached, return the cached value.
1051        let items = vec![test_item(256, FS_TREE_OBJECTID)];
1052        let segments = vec!["A".to_string()];
1053        let id_to_idx: HashMap<u64, usize> = [(256, 0)].into();
1054        let mut cache = HashMap::new();
1055        cache.insert(256, "cached/path".to_string());
1056
1057        let path = build_full_path(
1058            256,
1059            FS_TREE_OBJECTID,
1060            &id_to_idx,
1061            &segments,
1062            &items,
1063            &mut cache,
1064        );
1065        assert_eq!(path, "cached/path");
1066    }
1067
1068    #[test]
1069    fn build_full_path_root_id_not_in_items() {
1070        // root_id not present in id_to_idx at all
1071        let items = vec![test_item(256, FS_TREE_OBJECTID)];
1072        let segments = vec!["A".to_string()];
1073        let id_to_idx: HashMap<u64, usize> = [(256, 0)].into();
1074        let mut cache = HashMap::new();
1075
1076        let path = build_full_path(
1077            999,
1078            FS_TREE_OBJECTID,
1079            &id_to_idx,
1080            &segments,
1081            &items,
1082            &mut cache,
1083        );
1084        assert_eq!(path, "");
1085    }
1086}