Skip to main content

btrfs_uapi/
fiemap.rs

1//! # Physical extent layout of files via `FS_IOC_FIEMAP`
2//!
3//! `FS_IOC_FIEMAP` is a standard VFS ioctl (not btrfs-specific) that reports
4//! the physical extents backing a file.  It is the mechanism `btrfs filesystem
5//! du` uses to determine how much disk space each file occupies and which
6//! extents are shared with other files.
7
8use nix::libc;
9use std::os::unix::io::BorrowedFd;
10
11// FS_IOC_FIEMAP = _IOWR('f', 11, struct fiemap)
12// struct fiemap (without flexible array member) = 32 bytes
13// On 64-bit Linux: (3 << 30) | (32 << 16) | (0x66 << 8) | 11 = 0xC020_660B
14const FS_IOC_FIEMAP: libc::Ioctl = 0xC020_660Bu32 as libc::Ioctl;
15
16// fiemap header field offsets (all native byte order)
17const FM_START: usize = 0; // u64 — logical offset to start from
18const FM_LENGTH: usize = 8; // u64 — logical length to map
19const FM_FLAGS: usize = 16; // u32 — request flags
20const FM_MAPPED: usize = 20; // u32 — out: number of extents returned
21const FM_COUNT: usize = 24; // u32 — in:  number of extent slots
22
23// fiemap_extent field offsets within one 56-byte slot
24const FE_LOGICAL: usize = 0; // u64
25const FE_PHYSICAL: usize = 8; // u64
26const FE_LENGTH: usize = 16; // u64
27// fe_reserved64[2] at 24..40
28const FE_FLAGS: usize = 40; // u32
29
30const FE_SIZE: usize = 56;
31
32const FIEMAP_EXTENT_LAST: u32 = 0x0000_0001;
33const FIEMAP_EXTENT_UNKNOWN: u32 = 0x0000_0002;
34const FIEMAP_EXTENT_DELALLOC: u32 = 0x0000_0004;
35const FIEMAP_EXTENT_DATA_INLINE: u32 = 0x0000_0200;
36const FIEMAP_EXTENT_SHARED: u32 = 0x0000_2000;
37
38/// Flags for extents whose bytes we do not count (unknown location,
39/// not-yet-written, or stored inline in metadata).
40const SKIP_FLAGS: u32 =
41    FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_DATA_INLINE;
42
43/// Number of extent slots to request per ioctl call.
44const EXTENTS_PER_BATCH: u32 = 256;
45
46/// Summary of the physical extent usage of a single file.
47#[derive(Debug, Clone, Default)]
48pub struct FileExtentInfo {
49    /// Sum of the lengths of all non-inline, non-delalloc extents.
50    pub total_bytes: u64,
51    /// Bytes covered by extents flagged `FIEMAP_EXTENT_SHARED`.
52    pub shared_bytes: u64,
53    /// Physical `(start, end_exclusive)` ranges of every shared extent.
54    ///
55    /// Callers that need to compute a "set shared" total across multiple files
56    /// should collect these ranges, sort, and merge overlaps.
57    pub shared_extents: Vec<(u64, u64)>,
58}
59
60/// Query `FS_IOC_FIEMAP` for every extent of the file referred to by `fd`.
61///
62/// The returned [`FileExtentInfo`] includes total bytes, shared bytes, and the
63/// physical ranges of all shared extents so the caller can compute cross-file
64/// deduplication counts.
65///
66/// `fd` must be open on a regular file.  Symlinks and directories will return
67/// an empty result or an error depending on the kernel version.
68pub fn file_extents(fd: BorrowedFd) -> nix::Result<FileExtentInfo> {
69    use std::os::fd::AsRawFd;
70
71    // We use a Vec<u64> to guarantee 8-byte alignment for the fiemap buffer.
72    let slots = EXTENTS_PER_BATCH as usize;
73    let buf_bytes = 32 + slots * FE_SIZE;
74    let words = (buf_bytes + 7) / 8;
75    let mut buf: Vec<u64> = vec![0u64; words];
76
77    let raw_fd = fd.as_raw_fd();
78    let mut info = FileExtentInfo::default();
79    let mut logical_start: u64 = 0;
80    let mut done = false;
81
82    while !done {
83        buf.fill(0);
84        {
85            let b = as_bytes_mut(&mut buf);
86            write_u64(b, FM_START, logical_start);
87            write_u64(b, FM_LENGTH, u64::MAX.saturating_sub(logical_start));
88            write_u32(b, FM_FLAGS, 0);
89            write_u32(b, FM_COUNT, EXTENTS_PER_BATCH);
90        }
91
92        // SAFETY: buf is aligned and large enough for the fiemap header plus
93        // EXTENTS_PER_BATCH extent slots.  The ioctl only writes within that
94        // region.  raw_fd is a valid open file descriptor for the duration of
95        // this call.
96        let ret = unsafe {
97            libc::ioctl(
98                raw_fd,
99                FS_IOC_FIEMAP,
100                buf.as_mut_ptr() as *mut libc::c_void,
101            )
102        };
103        if ret < 0 {
104            return Err(nix::errno::Errno::last());
105        }
106
107        let b = as_bytes(&buf);
108        let nr = read_u32(b, FM_MAPPED) as usize;
109        if nr == 0 {
110            break;
111        }
112
113        let mut last_logical: u64 = logical_start;
114        let mut last_length: u64 = 0;
115
116        for i in 0..nr {
117            let off = 32 + i * FE_SIZE;
118            let flags = read_u32(b, off + FE_FLAGS);
119            let length = read_u64(b, off + FE_LENGTH);
120            let physical = read_u64(b, off + FE_PHYSICAL);
121
122            last_logical = read_u64(b, off + FE_LOGICAL);
123            last_length = length;
124
125            if flags & FIEMAP_EXTENT_LAST != 0 {
126                done = true;
127            }
128
129            if flags & SKIP_FLAGS != 0 || length == 0 {
130                continue;
131            }
132
133            info.total_bytes += length;
134
135            if flags & FIEMAP_EXTENT_SHARED != 0 {
136                info.shared_bytes += length;
137                info.shared_extents.push((physical, physical + length));
138            }
139        }
140
141        // Advance the logical cursor past the last extent seen.
142        let next = last_logical.saturating_add(last_length);
143        if next <= logical_start {
144            break; // guard against zero-length loops
145        }
146        logical_start = next;
147    }
148
149    Ok(info)
150}
151
152fn as_bytes(v: &[u64]) -> &[u8] {
153    // SAFETY: any &[u64] can be viewed as &[u8]; length scales correctly.
154    unsafe { std::slice::from_raw_parts(v.as_ptr().cast(), v.len() * 8) }
155}
156
157fn as_bytes_mut(v: &mut [u64]) -> &mut [u8] {
158    // SAFETY: same as above, with exclusive access.
159    unsafe {
160        std::slice::from_raw_parts_mut(v.as_mut_ptr().cast(), v.len() * 8)
161    }
162}
163
164fn read_u64(buf: &[u8], off: usize) -> u64 {
165    u64::from_ne_bytes(buf[off..off + 8].try_into().unwrap())
166}
167
168fn read_u32(buf: &[u8], off: usize) -> u32 {
169    u32::from_ne_bytes(buf[off..off + 4].try_into().unwrap())
170}
171
172fn write_u64(buf: &mut [u8], off: usize, val: u64) {
173    buf[off..off + 8].copy_from_slice(&val.to_ne_bytes());
174}
175
176fn write_u32(buf: &mut [u8], off: usize, val: u32) {
177    buf[off..off + 4].copy_from_slice(&val.to_ne_bytes());
178}