Skip to main content

btrfs_uapi/
dedupe.rs

1//! # Extent deduplication: comparing and deduplicating file extents
2//!
3//! Wraps `BTRFS_IOC_FILE_EXTENT_SAME` to request that the kernel compare a
4//! byte range in a source file against ranges in one or more destination files.
5//! Where the data is identical, the destination extents are replaced with
6//! references to the source extent, saving space.
7
8use crate::raw::{
9    BTRFS_SAME_DATA_DIFFERS, btrfs_ioc_file_extent_same, btrfs_ioctl_same_args,
10    btrfs_ioctl_same_extent_info,
11};
12use std::{
13    mem,
14    os::{fd::AsRawFd, unix::io::BorrowedFd},
15};
16
17/// A destination file and offset to deduplicate against the source range.
18#[derive(Debug, Clone)]
19pub struct DedupeTarget {
20    /// File descriptor of the destination file (passed as raw fd).
21    pub fd: BorrowedFd<'static>,
22    /// Byte offset in the destination file to compare from.
23    pub logical_offset: u64,
24}
25
26/// Result of a single dedupe comparison against one destination.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum DedupeResult {
29    /// Deduplication succeeded; the given number of bytes were deduped.
30    Deduped(u64),
31    /// The data differs between source and destination.
32    DataDiffers,
33    /// The kernel returned an error for this particular destination.
34    Error(i32),
35}
36
37/// Deduplicate a source range against one or more destination ranges.
38///
39/// Compares `length` bytes starting at `src_offset` in the file referred to
40/// by `src_fd` against each target. Where data matches, the destination
41/// extents are replaced with shared references to the source extent.
42///
43/// Returns one [`DedupeResult`] per target, in the same order.
44///
45/// # Errors
46///
47/// Returns `Err` if the ioctl fails. `EINVAL` if offsets or length are not
48/// sector-aligned, or if `targets` is empty. `EPERM` if destination files
49/// are not writable.
50#[allow(clippy::cast_possible_wrap)] // BTRFS_SAME_DATA_DIFFERS is 1, fits in i32
51pub fn file_extent_same(
52    src_fd: BorrowedFd<'_>,
53    src_offset: u64,
54    length: u64,
55    targets: &[DedupeTarget],
56) -> nix::Result<Vec<DedupeResult>> {
57    let count = targets.len();
58
59    // Flexible array member pattern: allocate header + count info entries.
60    let base_size = mem::size_of::<btrfs_ioctl_same_args>();
61    let info_size = mem::size_of::<btrfs_ioctl_same_extent_info>();
62    let total_bytes = base_size + count * info_size;
63    let num_u64s = total_bytes.div_ceil(mem::size_of::<u64>());
64    let mut buf = vec![0u64; num_u64s];
65
66    // SAFETY: buf is correctly sized and aligned for btrfs_ioctl_same_args.
67    // We populate the header and info entries before calling the ioctl, and
68    // read the results only after a successful return.
69    unsafe {
70        let args_ptr = buf.as_mut_ptr().cast::<btrfs_ioctl_same_args>();
71        (*args_ptr).logical_offset = src_offset;
72        (*args_ptr).length = length;
73        #[allow(clippy::cast_possible_truncation)]
74        // count is bounded by target slice length
75        {
76            (*args_ptr).dest_count = count as u16;
77        }
78
79        let info_slice = (*args_ptr).info.as_mut_slice(count);
80        for (i, target) in targets.iter().enumerate() {
81            info_slice[i].fd = i64::from(target.fd.as_raw_fd());
82            info_slice[i].logical_offset = target.logical_offset;
83        }
84
85        btrfs_ioc_file_extent_same(src_fd.as_raw_fd(), &raw mut *args_ptr)?;
86
87        let info_slice = (*args_ptr).info.as_slice(count);
88        Ok(info_slice
89            .iter()
90            .map(|info| {
91                if info.status == 0 {
92                    DedupeResult::Deduped(info.bytes_deduped)
93                } else if info.status == BTRFS_SAME_DATA_DIFFERS as i32 {
94                    DedupeResult::DataDiffers
95                } else {
96                    DedupeResult::Error(info.status)
97                }
98            })
99            .collect())
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn dedupe_result_deduped_debug() {
109        let r = DedupeResult::Deduped(4096);
110        assert_eq!(format!("{r:?}"), "Deduped(4096)");
111    }
112
113    #[test]
114    fn dedupe_result_data_differs_debug() {
115        let r = DedupeResult::DataDiffers;
116        assert_eq!(format!("{r:?}"), "DataDiffers");
117    }
118
119    #[test]
120    fn dedupe_result_error_debug() {
121        let r = DedupeResult::Error(-22);
122        assert_eq!(format!("{r:?}"), "Error(-22)");
123    }
124
125    #[test]
126    fn dedupe_result_equality() {
127        assert_eq!(DedupeResult::Deduped(100), DedupeResult::Deduped(100));
128        assert_ne!(DedupeResult::Deduped(100), DedupeResult::Deduped(200));
129        assert_eq!(DedupeResult::DataDiffers, DedupeResult::DataDiffers);
130        assert_ne!(DedupeResult::DataDiffers, DedupeResult::Deduped(0));
131        assert_eq!(DedupeResult::Error(-1), DedupeResult::Error(-1));
132        assert_ne!(DedupeResult::Error(-1), DedupeResult::Error(-2));
133    }
134
135    #[test]
136    fn allocation_sizing() {
137        // Verify the flexible array member allocation produces enough space.
138        let base_size = mem::size_of::<btrfs_ioctl_same_args>();
139        let info_size = mem::size_of::<btrfs_ioctl_same_extent_info>();
140
141        for count in [0, 1, 2, 5, 16, 255] {
142            let total_bytes = base_size + count * info_size;
143            let num_u64s = total_bytes.div_ceil(mem::size_of::<u64>());
144            let allocated = num_u64s * mem::size_of::<u64>();
145            assert!(
146                allocated >= total_bytes,
147                "count={count}: allocated {allocated} < needed {total_bytes}"
148            );
149        }
150    }
151
152    #[test]
153    fn btrfs_same_data_differs_value() {
154        // Sanity check: the constant should be 1 per the kernel header.
155        assert_eq!(BTRFS_SAME_DATA_DIFFERS, 1);
156    }
157}