Skip to main content

btrfs_uapi/
fiemap.rs

1//! # Physical extent layout of files via `FS_IOC_FIEMAP`
2//!
3//! `FS_IOC_FIEMAP` is a standard VFS ioctl (not btrfs-specific) that reports
4//! the physical extents backing a file.  It is the mechanism `btrfs filesystem
5//! du` uses to determine how much disk space each file occupies and which
6//! extents are shared with other files.
7
8use nix::libc;
9use std::os::unix::io::BorrowedFd;
10
11// FS_IOC_FIEMAP = _IOWR('f', 11, struct fiemap)
12// struct fiemap (without flexible array member) = 32 bytes
13// On 64-bit Linux: (3 << 30) | (32 << 16) | (0x66 << 8) | 11 = 0xC020_660B
14const FS_IOC_FIEMAP: libc::Ioctl = 0xC020_660Bu32 as libc::Ioctl;
15
16// fiemap header field offsets (all native byte order)
17const FM_START: usize = 0; // u64 — logical offset to start from
18const FM_LENGTH: usize = 8; // u64 — logical length to map
19const FM_FLAGS: usize = 16; // u32 — request flags
20const FM_MAPPED: usize = 20; // u32 — out: number of extents returned
21const FM_COUNT: usize = 24; // u32 — in:  number of extent slots
22
23// fiemap_extent field offsets within one 56-byte slot
24const FE_LOGICAL: usize = 0; // u64
25const FE_PHYSICAL: usize = 8; // u64
26const FE_LENGTH: usize = 16; // u64
27// fe_reserved64[2] at 24..40
28const FE_FLAGS: usize = 40; // u32
29
30const FE_SIZE: usize = 56;
31
32const FIEMAP_EXTENT_LAST: u32 = 0x0000_0001;
33const FIEMAP_EXTENT_UNKNOWN: u32 = 0x0000_0002;
34const FIEMAP_EXTENT_DELALLOC: u32 = 0x0000_0004;
35const FIEMAP_EXTENT_DATA_INLINE: u32 = 0x0000_0200;
36const FIEMAP_EXTENT_SHARED: u32 = 0x0000_2000;
37
38/// Flags for extents whose bytes we do not count (unknown location,
39/// not-yet-written, or stored inline in metadata).
40const SKIP_FLAGS: u32 = FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_DATA_INLINE;
41
42/// Number of extent slots to request per ioctl call.
43const EXTENTS_PER_BATCH: u32 = 256;
44
45/// Summary of the physical extent usage of a single file.
46#[derive(Debug, Clone, Default)]
47pub struct FileExtentInfo {
48    /// Sum of the lengths of all non-inline, non-delalloc extents.
49    pub total_bytes: u64,
50    /// Bytes covered by extents flagged `FIEMAP_EXTENT_SHARED`.
51    pub shared_bytes: u64,
52    /// Physical `(start, end_exclusive)` ranges of every shared extent.
53    ///
54    /// Callers that need to compute a "set shared" total across multiple files
55    /// should collect these ranges, sort, and merge overlaps.
56    pub shared_extents: Vec<(u64, u64)>,
57}
58
59/// Query `FS_IOC_FIEMAP` for every extent of the file referred to by `fd`.
60///
61/// The returned [`FileExtentInfo`] includes total bytes, shared bytes, and the
62/// physical ranges of all shared extents so the caller can compute cross-file
63/// deduplication counts.
64///
65/// `fd` must be open on a regular file.  Symlinks and directories will return
66/// an empty result or an error depending on the kernel version.
67pub fn file_extents(fd: BorrowedFd) -> nix::Result<FileExtentInfo> {
68    use std::os::fd::AsRawFd;
69
70    // We use a Vec<u64> to guarantee 8-byte alignment for the fiemap buffer.
71    let slots = EXTENTS_PER_BATCH as usize;
72    let buf_bytes = 32 + slots * FE_SIZE;
73    let words = (buf_bytes + 7) / 8;
74    let mut buf: Vec<u64> = vec![0u64; words];
75
76    let raw_fd = fd.as_raw_fd();
77    let mut info = FileExtentInfo::default();
78    let mut logical_start: u64 = 0;
79    let mut done = false;
80
81    while !done {
82        buf.fill(0);
83        {
84            let b = as_bytes_mut(&mut buf);
85            write_u64(b, FM_START, logical_start);
86            write_u64(b, FM_LENGTH, u64::MAX.saturating_sub(logical_start));
87            write_u32(b, FM_FLAGS, 0);
88            write_u32(b, FM_COUNT, EXTENTS_PER_BATCH);
89        }
90
91        // SAFETY: buf is aligned and large enough for the fiemap header plus
92        // EXTENTS_PER_BATCH extent slots.  The ioctl only writes within that
93        // region.  raw_fd is a valid open file descriptor for the duration of
94        // this call.
95        let ret =
96            unsafe { libc::ioctl(raw_fd, FS_IOC_FIEMAP, buf.as_mut_ptr() as *mut libc::c_void) };
97        if ret < 0 {
98            return Err(nix::errno::Errno::last());
99        }
100
101        let b = as_bytes(&buf);
102        let nr = read_u32(b, FM_MAPPED) as usize;
103        if nr == 0 {
104            break;
105        }
106
107        let mut last_logical: u64 = logical_start;
108        let mut last_length: u64 = 0;
109
110        for i in 0..nr {
111            let off = 32 + i * FE_SIZE;
112            let flags = read_u32(b, off + FE_FLAGS);
113            let length = read_u64(b, off + FE_LENGTH);
114            let physical = read_u64(b, off + FE_PHYSICAL);
115
116            last_logical = read_u64(b, off + FE_LOGICAL);
117            last_length = length;
118
119            if flags & FIEMAP_EXTENT_LAST != 0 {
120                done = true;
121            }
122
123            if flags & SKIP_FLAGS != 0 || length == 0 {
124                continue;
125            }
126
127            info.total_bytes += length;
128
129            if flags & FIEMAP_EXTENT_SHARED != 0 {
130                info.shared_bytes += length;
131                info.shared_extents.push((physical, physical + length));
132            }
133        }
134
135        // Advance the logical cursor past the last extent seen.
136        let next = last_logical.saturating_add(last_length);
137        if next <= logical_start {
138            break; // guard against zero-length loops
139        }
140        logical_start = next;
141    }
142
143    Ok(info)
144}
145
146fn as_bytes(v: &[u64]) -> &[u8] {
147    // SAFETY: any &[u64] can be viewed as &[u8]; length scales correctly.
148    unsafe { std::slice::from_raw_parts(v.as_ptr().cast(), v.len() * 8) }
149}
150
151fn as_bytes_mut(v: &mut [u64]) -> &mut [u8] {
152    // SAFETY: same as above, with exclusive access.
153    unsafe { std::slice::from_raw_parts_mut(v.as_mut_ptr().cast(), v.len() * 8) }
154}
155
156fn read_u64(buf: &[u8], off: usize) -> u64 {
157    u64::from_ne_bytes(buf[off..off + 8].try_into().unwrap())
158}
159
160fn read_u32(buf: &[u8], off: usize) -> u32 {
161    u32::from_ne_bytes(buf[off..off + 4].try_into().unwrap())
162}
163
164fn write_u64(buf: &mut [u8], off: usize, val: u64) {
165    buf[off..off + 8].copy_from_slice(&val.to_ne_bytes());
166}
167
168fn write_u32(buf: &mut [u8], off: usize, val: u32) {
169    buf[off..off + 4].copy_from_slice(&val.to_ne_bytes());
170}