objects/fs_clone.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Filesystem-level copy-on-write helpers.
3//!
4//! Heddle's worktree materializer needs the storage win of pointing
5//! N worktrees at the same blob bytes (so checking out the same state
6//! to many sibling worktrees costs ~1× disk, not N×) **without** the
7//! mutation hazard that hardlinks bring. With hardlinks, an in-place
8//! write — `chmod +w file && echo new > file`, `O_TRUNC`, etc. —
9//! mutates the shared inode, corrupting every other worktree that
10//! points at the same blob.
11//!
12//! Filesystem reflinks (a.k.a. CoW clones) solve this: the destination
13//! starts out sharing physical blocks with the source, but the first
14//! write to either side automatically forks the underlying allocation.
15//! The OS guarantees isolation even if an agent strips the read-only
16//! bit and overwrites the file in place.
17//!
18//! Platform support:
19//! - **macOS / APFS:** `clonefile(2)` from `<sys/clonefile.h>`. True CoW.
20//! - **Linux / btrfs / XFS-with-reflinks / ZFS:** `ioctl(dest_fd, FICLONE, src_fd)`.
21//! - **Anywhere else** (or when reflink isn't supported by the
22//! underlying filesystem): caller falls back to a real copy.
23//!
24//! The functions here return `Ok(true)` on a successful clone,
25//! `Ok(false)` when the kernel reported the operation isn't supported
26//! on this filesystem (so the caller should fall back to a real copy
27//! and remember to skip future reflink attempts in this batch), and an
28//! `Err` for genuine I/O errors that the caller should surface.
29
30use std::{fs, io, path::Path};
31
32/// Try a filesystem-level reflink (copy-on-write clone) from `source`
33/// to `dest`. On success the destination has its own inode and shares
34/// physical blocks with the source until either side is modified.
35///
36/// On a successful reflink: returns `Ok(true)`. The destination file
37/// has been created with the kernel's choice of permissions (typically
38/// the source's). Callers should `set_permissions` afterwards if they
39/// need a specific mode.
40///
41/// On a "filesystem doesn't support reflinks" verdict (`EXDEV`,
42/// `EOPNOTSUPP`, `ENOTSUP`, `ENOSYS`, `EINVAL` from the ioctl form):
43/// returns `Ok(false)`. The caller should fall back to `fs::copy` and
44/// remember to skip future reflink attempts on this filesystem.
45///
46/// On any other I/O error: returns `Err`.
47///
48/// `dest` must not already exist on macOS (`clonefile` requires a
49/// nonexistent destination). On Linux `FICLONE` requires the dest fd
50/// be opened for writing on a regular file, which we create with
51/// `O_CREAT | O_WRONLY | O_TRUNC`.
52pub fn try_reflink(source: &Path, dest: &Path) -> io::Result<bool> {
53 #[cfg(target_os = "macos")]
54 {
55 try_clonefile_macos(source, dest)
56 }
57 #[cfg(target_os = "linux")]
58 {
59 try_ficlone_linux(source, dest)
60 }
61 #[cfg(not(any(target_os = "macos", target_os = "linux")))]
62 {
63 let _ = (source, dest);
64 Ok(false)
65 }
66}
67
68/// Reflink if possible, otherwise fall back to a real copy. Returns
69/// the same `Ok(true)/Ok(false)` discriminator as [`try_reflink`] —
70/// `true` when the OS gave us a CoW clone, `false` when we paid the
71/// full copy cost. Either way, on `Ok` the destination exists and has
72/// the source's bytes.
73///
74/// The destination's permission bits are not normalized here. Callers
75/// that need a specific mode (`0o644`, `0o755`) should call
76/// `fs::set_permissions` after a successful return.
77pub fn clonefile_or_copy(source: &Path, dest: &Path) -> io::Result<bool> {
78 // `clonefile`/FICLONE require dest not to exist; remove any stale
79 // entry first. Ignored if dest doesn't exist.
80 let _ = fs::remove_file(dest);
81 if try_reflink(source, dest)? {
82 return Ok(true);
83 }
84 fs::copy(source, dest)?;
85 Ok(false)
86}
87
88#[cfg(target_os = "macos")]
89fn try_clonefile_macos(source: &Path, dest: &Path) -> io::Result<bool> {
90 use std::{ffi::CString, os::unix::ffi::OsStrExt};
91
92 // SAFETY: linking the system `clonefile(2)` symbol. Signature
93 // matches `<sys/clonefile.h>`:
94 // int clonefile(const char *src, const char *dst, uint32_t flags);
95 unsafe extern "C" {
96 fn clonefile(src: *const libc::c_char, dst: *const libc::c_char, flags: u32)
97 -> libc::c_int;
98 }
99
100 let src_c = CString::new(source.as_os_str().as_bytes()).map_err(|_| {
101 io::Error::new(
102 io::ErrorKind::InvalidInput,
103 "source path contains interior NUL",
104 )
105 })?;
106 let dst_c = CString::new(dest.as_os_str().as_bytes()).map_err(|_| {
107 io::Error::new(
108 io::ErrorKind::InvalidInput,
109 "destination path contains interior NUL",
110 )
111 })?;
112
113 // SAFETY: both pointers are NUL-terminated C strings owned by
114 // the local CStrings; flags=0 requests the default behavior
115 // (clone metadata + data, follow no symlinks on the source).
116 let rc = unsafe { clonefile(src_c.as_ptr(), dst_c.as_ptr(), 0) };
117 if rc == 0 {
118 return Ok(true);
119 }
120
121 let err = io::Error::last_os_error();
122 if reflink_unsupported(&err) {
123 Ok(false)
124 } else {
125 Err(err)
126 }
127}
128
129#[cfg(target_os = "linux")]
130fn try_ficlone_linux(source: &Path, dest: &Path) -> io::Result<bool> {
131 use std::{fs::OpenOptions, os::unix::io::AsRawFd};
132
133 // FICLONE = _IOW(0x94, 9, int) on Linux. The kernel header
134 // `<linux/fs.h>` (and `<linux/fs.h>` UAPI) define this as
135 // 0x40049409 = (1 << 30) | (4 << 16) | (0x94 << 8) | 9
136 // i.e. _IOC_WRITE | sizeof(int) | type=0x94 | nr=9.
137 const FICLONE: libc::c_ulong = 0x4004_9409;
138
139 let src = OpenOptions::new().read(true).open(source)?;
140 let dst = OpenOptions::new()
141 .write(true)
142 .create(true)
143 .truncate(true)
144 .open(dest)?;
145
146 // SAFETY: ioctl with two valid fds; FICLONE expects an `int` fd
147 // as the third arg.
148 let rc = unsafe { libc::ioctl(dst.as_raw_fd(), FICLONE, src.as_raw_fd()) };
149 if rc == 0 {
150 return Ok(true);
151 }
152
153 let err = io::Error::last_os_error();
154 // Clean up the empty dest we just created so the caller's
155 // `fs::copy` fallback starts from a known state.
156 drop(dst);
157 let _ = fs::remove_file(dest);
158 if reflink_unsupported(&err) {
159 Ok(false)
160 } else {
161 Err(err)
162 }
163}
164
165/// Decide whether a clonefile/FICLONE error means "this filesystem
166/// (or this src/dst pair) won't ever reflink" vs a transient or
167/// caller-bug failure that we should surface.
168#[cfg(any(target_os = "macos", target_os = "linux"))]
169fn reflink_unsupported(err: &io::Error) -> bool {
170 let Some(code) = err.raw_os_error() else {
171 return false;
172 };
173 // EXDEV: cross-device — the two paths live on different filesystems.
174 // EOPNOTSUPP / ENOTSUP: filesystem doesn't implement reflinks
175 // (e.g. ext4 on Linux, HFS+ on macOS). On Linux these two are
176 // aliases (both = 95) so listing both makes one branch
177 // unreachable; on macOS they're distinct (102 vs 45), so we need
178 // both to be matched. `#[allow(unreachable_patterns)]` keeps the
179 // portable spelling without a `cfg`-split.
180 // ENOSYS: kernel too old to know the syscall.
181 // EINVAL: FICLONE returns this when the src/dst aren't on the same
182 // filesystem on some kernels, or when the filesystem is mounted
183 // without reflink support.
184 #[allow(unreachable_patterns)]
185 let is_unsupported = matches!(
186 code,
187 libc::EXDEV | libc::EOPNOTSUPP | libc::ENOTSUP | libc::ENOSYS | libc::EINVAL
188 );
189 is_unsupported
190}
191
192/// Test whether the filesystem at `parent_dir` supports reflinks by
193/// trying one against a temp source/dest pair. Returns `true` on
194/// success. Useful for tests that want to soft-skip on filesystems
195/// without CoW support, and for any caller that wants a runtime
196/// capability check before asserting on reflink-specific properties.
197pub fn filesystem_supports_reflink(parent_dir: &Path) -> bool {
198 use std::io::Write;
199
200 let src = parent_dir.join(".heddle-reflink-probe-src");
201 let dst = parent_dir.join(".heddle-reflink-probe-dst");
202 let _ = fs::remove_file(&src);
203 let _ = fs::remove_file(&dst);
204
205 let mut f = match fs::File::create(&src) {
206 Ok(f) => f,
207 Err(_) => return false,
208 };
209 if f.write_all(b"reflink-probe").is_err() {
210 let _ = fs::remove_file(&src);
211 return false;
212 }
213 drop(f);
214
215 let supported = matches!(try_reflink(&src, &dst), Ok(true));
216 let _ = fs::remove_file(&src);
217 let _ = fs::remove_file(&dst);
218 supported
219}
220
221#[cfg(test)]
222mod tests {
223 use tempfile::TempDir;
224
225 use super::*;
226
227 #[test]
228 fn clonefile_or_copy_creates_destination_with_source_bytes() {
229 let temp = TempDir::new().unwrap();
230 let src = temp.path().join("src.txt");
231 let dst = temp.path().join("dst.txt");
232 fs::write(&src, b"hello reflink").unwrap();
233
234 let _ = clonefile_or_copy(&src, &dst).unwrap();
235 assert_eq!(fs::read(&dst).unwrap(), b"hello reflink");
236 }
237
238 #[test]
239 fn clonefile_or_copy_overwrites_existing_destination() {
240 let temp = TempDir::new().unwrap();
241 let src = temp.path().join("src.txt");
242 let dst = temp.path().join("dst.txt");
243 fs::write(&src, b"new content").unwrap();
244 fs::write(&dst, b"old content").unwrap();
245
246 let _ = clonefile_or_copy(&src, &dst).unwrap();
247 assert_eq!(fs::read(&dst).unwrap(), b"new content");
248 }
249
250 /// Core isolation property: writing to the cloned destination
251 /// must not change the source's bytes. With a real CoW clone the
252 /// kernel forks blocks on first write; with the `fs::copy`
253 /// fallback the dest is a separate file from the start. Either
254 /// way the source must be untouched.
255 #[test]
256 fn writing_to_destination_does_not_mutate_source() {
257 let temp = TempDir::new().unwrap();
258 let src = temp.path().join("src.txt");
259 let dst = temp.path().join("dst.txt");
260 fs::write(&src, b"original source").unwrap();
261
262 let _ = clonefile_or_copy(&src, &dst).unwrap();
263 fs::write(&dst, b"mutated dest").unwrap();
264
265 assert_eq!(fs::read(&src).unwrap(), b"original source");
266 assert_eq!(fs::read(&dst).unwrap(), b"mutated dest");
267 }
268
269 /// Reflinks (unlike hardlinks) give the destination its own
270 /// inode. On a CoW filesystem this is the key correctness
271 /// distinction: agents can chmod or write in place without
272 /// reaching across worktrees.
273 #[cfg(unix)]
274 #[test]
275 fn successful_reflink_yields_distinct_inode() {
276 use std::os::unix::fs::MetadataExt;
277
278 let temp = TempDir::new().unwrap();
279 if !filesystem_supports_reflink(temp.path()) {
280 eprintln!(
281 "[skip] filesystem at {:?} does not support reflinks; cannot assert inode property",
282 temp.path()
283 );
284 return;
285 }
286
287 let src = temp.path().join("src.txt");
288 let dst = temp.path().join("dst.txt");
289 fs::write(&src, b"reflink inode test").unwrap();
290
291 let did_reflink = try_reflink(&src, &dst).unwrap();
292 assert!(did_reflink, "filesystem advertised reflink support");
293
294 let src_inode = fs::metadata(&src).unwrap().ino();
295 let dst_inode = fs::metadata(&dst).unwrap().ino();
296 assert_ne!(
297 src_inode, dst_inode,
298 "reflinked files must have distinct inodes (got {} for both)",
299 src_inode
300 );
301 }
302}