Skip to main content

btrfs_uapi/
fiemap.rs

1//! # Physical extent layout of files via `FS_IOC_FIEMAP`
2//!
3//! `FS_IOC_FIEMAP` is a standard VFS ioctl (not btrfs-specific) that reports
4//! the physical extents backing a file.  It is the mechanism `btrfs filesystem
5//! du` uses to determine how much disk space each file occupies and which
6//! extents are shared with other files.
7
8use nix::libc;
9use std::os::unix::io::BorrowedFd;
10
11// FS_IOC_FIEMAP = _IOWR('f', 11, struct fiemap)
12// struct fiemap (without flexible array member) = 32 bytes
13// On 64-bit Linux: (3 << 30) | (32 << 16) | (0x66 << 8) | 11 = 0xC020_660B
14const FS_IOC_FIEMAP: libc::Ioctl = 0xC020_660Bu32 as libc::Ioctl;
15
16// fiemap header field offsets (all native byte order)
17const FM_START: usize = 0; // u64 — logical offset to start from
18const FM_LENGTH: usize = 8; // u64 — logical length to map
19const FM_FLAGS: usize = 16; // u32 — request flags
20const FM_MAPPED: usize = 20; // u32 — out: number of extents returned
21const FM_COUNT: usize = 24; // u32 — in:  number of extent slots
22
23// fiemap_extent field offsets within one 56-byte slot
24const FE_LOGICAL: usize = 0; // u64
25const FE_PHYSICAL: usize = 8; // u64
26const FE_LENGTH: usize = 16; // u64
27// fe_reserved64[2] at 24..40
28const FE_FLAGS: usize = 40; // u32
29
30const FE_SIZE: usize = 56;
31
32const FIEMAP_EXTENT_LAST: u32 = 0x0000_0001;
33const FIEMAP_EXTENT_UNKNOWN: u32 = 0x0000_0002;
34const FIEMAP_EXTENT_DELALLOC: u32 = 0x0000_0004;
35const FIEMAP_EXTENT_DATA_INLINE: u32 = 0x0000_0200;
36const FIEMAP_EXTENT_SHARED: u32 = 0x0000_2000;
37
38/// Flags for extents whose bytes we do not count (unknown location,
39/// not-yet-written, or stored inline in metadata).
40const SKIP_FLAGS: u32 =
41    FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_DATA_INLINE;
42
43/// Number of extent slots to request per ioctl call.
44const EXTENTS_PER_BATCH: u32 = 256;
45
46/// Summary of the physical extent usage of a single file.
47#[derive(Debug, Clone, Default)]
48pub struct FileExtentInfo {
49    /// Sum of the lengths of all non-inline, non-delalloc extents.
50    pub total_bytes: u64,
51    /// Bytes covered by extents flagged `FIEMAP_EXTENT_SHARED`.
52    pub shared_bytes: u64,
53    /// Physical `(start, end_exclusive)` ranges of every shared extent.
54    ///
55    /// Callers that need to compute a "set shared" total across multiple files
56    /// should collect these ranges, sort, and merge overlaps.
57    pub shared_extents: Vec<(u64, u64)>,
58}
59
60/// Query `FS_IOC_FIEMAP` for every extent of the file referred to by `fd`.
61///
62/// The returned [`FileExtentInfo`] includes total bytes, shared bytes, and the
63/// physical ranges of all shared extents so the caller can compute cross-file
64/// deduplication counts.
65///
66/// `fd` must be open on a regular file.  Symlinks and directories will return
67/// an empty result or an error depending on the kernel version.
68///
69/// # Errors
70///
71/// Returns `Err` if the `FS_IOC_FIEMAP` ioctl fails.
72pub fn file_extents(fd: BorrowedFd) -> nix::Result<FileExtentInfo> {
73    use std::os::fd::AsRawFd;
74
75    // We use a Vec<u64> to guarantee 8-byte alignment for the fiemap buffer.
76    let slots = EXTENTS_PER_BATCH as usize;
77    let buf_bytes = 32 + slots * FE_SIZE;
78    let words = buf_bytes.div_ceil(8);
79    let mut buf: Vec<u64> = vec![0u64; words];
80
81    let raw_fd = fd.as_raw_fd();
82    let mut info = FileExtentInfo::default();
83    let mut logical_start: u64 = 0;
84    let mut done = false;
85
86    while !done {
87        buf.fill(0);
88        {
89            let b = as_bytes_mut(&mut buf);
90            write_u64(b, FM_START, logical_start);
91            write_u64(b, FM_LENGTH, u64::MAX.saturating_sub(logical_start));
92            write_u32(b, FM_FLAGS, 0);
93            write_u32(b, FM_COUNT, EXTENTS_PER_BATCH);
94        }
95
96        // SAFETY: buf is aligned and large enough for the fiemap header plus
97        // EXTENTS_PER_BATCH extent slots.  The ioctl only writes within that
98        // region.  raw_fd is a valid open file descriptor for the duration of
99        // this call.
100        let ret = unsafe {
101            libc::ioctl(
102                raw_fd,
103                FS_IOC_FIEMAP,
104                buf.as_mut_ptr().cast::<libc::c_void>(),
105            )
106        };
107        if ret < 0 {
108            return Err(nix::errno::Errno::last());
109        }
110
111        let b = as_bytes(&buf);
112        let nr = read_u32(b, FM_MAPPED) as usize;
113        if nr == 0 {
114            break;
115        }
116
117        let mut last_logical: u64 = logical_start;
118        let mut last_length: u64 = 0;
119
120        for i in 0..nr {
121            let off = 32 + i * FE_SIZE;
122            let flags = read_u32(b, off + FE_FLAGS);
123            let length = read_u64(b, off + FE_LENGTH);
124            let physical = read_u64(b, off + FE_PHYSICAL);
125
126            last_logical = read_u64(b, off + FE_LOGICAL);
127            last_length = length;
128
129            if flags & FIEMAP_EXTENT_LAST != 0 {
130                done = true;
131            }
132
133            if flags & SKIP_FLAGS != 0 || length == 0 {
134                continue;
135            }
136
137            info.total_bytes += length;
138
139            if flags & FIEMAP_EXTENT_SHARED != 0 {
140                info.shared_bytes += length;
141                info.shared_extents.push((physical, physical + length));
142            }
143        }
144
145        // Advance the logical cursor past the last extent seen.
146        let next = last_logical.saturating_add(last_length);
147        if next <= logical_start {
148            break; // guard against zero-length loops
149        }
150        logical_start = next;
151    }
152
153    Ok(info)
154}
155
156fn as_bytes(v: &[u64]) -> &[u8] {
157    // SAFETY: any &[u64] can be viewed as &[u8]; length scales correctly.
158    unsafe { std::slice::from_raw_parts(v.as_ptr().cast(), v.len() * 8) }
159}
160
161fn as_bytes_mut(v: &mut [u64]) -> &mut [u8] {
162    // SAFETY: same as above, with exclusive access.
163    unsafe {
164        std::slice::from_raw_parts_mut(v.as_mut_ptr().cast(), v.len() * 8)
165    }
166}
167
168fn read_u64(buf: &[u8], off: usize) -> u64 {
169    u64::from_ne_bytes(buf[off..off + 8].try_into().unwrap())
170}
171
172fn read_u32(buf: &[u8], off: usize) -> u32 {
173    u32::from_ne_bytes(buf[off..off + 4].try_into().unwrap())
174}
175
176fn write_u64(buf: &mut [u8], off: usize, val: u64) {
177    buf[off..off + 8].copy_from_slice(&val.to_ne_bytes());
178}
179
180fn write_u32(buf: &mut [u8], off: usize, val: u32) {
181    buf[off..off + 4].copy_from_slice(&val.to_ne_bytes());
182}