use std::fs::{File, OpenOptions};
use std::io;
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::Path;
pub fn fast_copy(src: &Path, dst: &Path) -> io::Result<u64> {
let src_len = std::fs::metadata(src)?.len();
match reflink_copy::reflink(src, dst) {
Ok(()) => return Ok(src_len),
Err(e) if is_reflink_unsupported(&e) => {
}
Err(e) => return Err(e),
}
sparse_copy(src, dst)
}
pub fn sparse_copy(src: &Path, dst: &Path) -> io::Result<u64> {
let src_file = File::open(src)?;
let len = src_file.metadata()?.len();
let dst_file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(dst)?;
dst_file.set_len(len)?;
let src_fd = src_file.as_raw_fd();
let dst_fd = dst_file.as_raw_fd();
let mut off: i64 = 0;
while (off as u64) < len {
let data_start = unsafe { libc::lseek(src_fd, off, libc::SEEK_DATA) };
if data_start < 0 {
let err = io::Error::last_os_error();
if err.raw_os_error() == Some(libc::ENXIO) {
break;
}
return Err(err);
}
let data_end = unsafe { libc::lseek(src_fd, data_start, libc::SEEK_HOLE) };
if data_end < 0 {
return Err(io::Error::last_os_error());
}
let data_end = (data_end as u64).min(len);
let data_start = data_start as u64;
if data_end <= data_start {
break;
}
copy_extent(src_fd, dst_fd, data_start, data_end - data_start)?;
off = data_end as i64;
}
dst_file.sync_all()?;
Ok(len)
}
fn is_reflink_unsupported(e: &io::Error) -> bool {
let Some(code) = e.raw_os_error() else {
return false;
};
#[cfg(target_os = "linux")]
let aliases: &[i32] = &[libc::ENOTSUP, libc::EXDEV, libc::EINVAL];
#[cfg(not(target_os = "linux"))]
let aliases: &[i32] = &[libc::ENOTSUP, libc::EOPNOTSUPP, libc::EXDEV, libc::EINVAL];
aliases.contains(&code)
}
#[cfg(target_os = "linux")]
fn copy_extent(src_fd: RawFd, dst_fd: RawFd, off: u64, len: u64) -> io::Result<()> {
let mut src_off = off as i64;
let mut dst_off = off as i64;
let mut remaining = len;
while remaining > 0 {
let chunk = remaining.min(usize::MAX as u64 / 2) as usize;
let n =
unsafe { libc::copy_file_range(src_fd, &mut src_off, dst_fd, &mut dst_off, chunk, 0) };
if n < 0 {
let err = io::Error::last_os_error();
if matches!(
err.raw_os_error(),
Some(libc::ENOSYS)
| Some(libc::EXDEV)
| Some(libc::EINVAL)
| Some(libc::EOPNOTSUPP)
) {
let consumed = len - remaining;
return read_write_extent(src_fd, dst_fd, off + consumed, remaining);
}
return Err(err);
}
if n == 0 {
break;
}
remaining -= n as u64;
}
Ok(())
}
#[cfg(not(target_os = "linux"))]
fn copy_extent(src_fd: RawFd, dst_fd: RawFd, off: u64, len: u64) -> io::Result<()> {
read_write_extent(src_fd, dst_fd, off, len)
}
fn read_write_extent(src_fd: RawFd, dst_fd: RawFd, off: u64, len: u64) -> io::Result<()> {
const BUF_SIZE: usize = 64 * 1024;
let mut buf = [0u8; BUF_SIZE];
let mut copied: u64 = 0;
while copied < len {
let to_read = (len - copied).min(BUF_SIZE as u64) as usize;
let read_off = (off + copied) as i64;
let n = unsafe {
libc::pread(
src_fd,
buf.as_mut_ptr() as *mut libc::c_void,
to_read,
read_off,
)
};
if n < 0 {
return Err(io::Error::last_os_error());
}
if n == 0 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF mid-extent",
));
}
let n = n as usize;
let mut written: usize = 0;
while written < n {
let w_off = (off + copied + written as u64) as i64;
let w = unsafe {
libc::pwrite(
dst_fd,
buf[written..n].as_ptr() as *const libc::c_void,
n - written,
w_off,
)
};
if w < 0 {
return Err(io::Error::last_os_error());
}
if w == 0 {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"pwrite returned 0",
));
}
written += w as usize;
}
copied += n as u64;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::os::unix::fs::MetadataExt;
fn make_sparse(path: &Path, len: u64, data_offsets: &[u64]) -> io::Result<()> {
let f = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
f.set_len(len)?;
for &off in data_offsets {
let buf = vec![0xAB_u8; 64 * 1024];
let fd = f.as_raw_fd();
let n = unsafe { libc::pwrite(fd, buf.as_ptr() as *const _, buf.len(), off as i64) };
assert!(n > 0, "pwrite failed: {}", io::Error::last_os_error());
}
f.sync_all()?;
Ok(())
}
#[test]
fn round_trip_small() {
let dir = tempfile::tempdir().unwrap();
let src = dir.path().join("src.bin");
let dst = dir.path().join("dst.bin");
std::fs::write(&src, b"hello world").unwrap();
let n = fast_copy(&src, &dst).unwrap();
assert_eq!(n, 11);
assert_eq!(std::fs::read(&dst).unwrap(), b"hello world");
}
#[test]
fn sparse_copy_preserves_holes_and_data() {
let dir = tempfile::tempdir().unwrap();
let src = dir.path().join("src.bin");
let dst = dir.path().join("dst.bin");
let len: u64 = 16 * 1024 * 1024;
let offsets = [0u64, 4 * 1024 * 1024, 8 * 1024 * 1024, 12 * 1024 * 1024];
make_sparse(&src, len, &offsets).unwrap();
let n = sparse_copy(&src, &dst).unwrap();
assert_eq!(n, len);
let dst_meta = std::fs::metadata(&dst).unwrap();
assert_eq!(dst_meta.len(), len);
let mut buf = [0u8; 64 * 1024];
let dst_file = File::open(&dst).unwrap();
for &off in &offsets {
let n = unsafe {
libc::pread(
dst_file.as_raw_fd(),
buf.as_mut_ptr() as *mut _,
buf.len(),
off as i64,
)
};
assert_eq!(n as usize, buf.len());
assert!(buf.iter().all(|&b| b == 0xAB));
}
let src_bytes_on_disk = std::fs::metadata(&src).unwrap().blocks() * 512;
let dst_bytes_on_disk = dst_meta.blocks() * 512;
if src_bytes_on_disk < len / 2 {
assert!(
dst_bytes_on_disk < len / 2,
"source is sparse ({src_bytes_on_disk} bytes on disk) but destination densified to {dst_bytes_on_disk} bytes for an apparent size of {len}",
);
assert!(
dst_bytes_on_disk <= src_bytes_on_disk * 4 + 1024 * 1024,
"destination allocated significantly more than source: src={src_bytes_on_disk} dst={dst_bytes_on_disk}",
);
} else {
eprintln!(
"filesystem did not sparsify the source (src_bytes_on_disk={src_bytes_on_disk}, apparent={len}); sparseness preservation not exercised in this run",
);
assert!(
dst_bytes_on_disk <= src_bytes_on_disk + 1024 * 1024,
"destination grew beyond source footprint: src={src_bytes_on_disk} dst={dst_bytes_on_disk}",
);
}
}
#[test]
fn fast_copy_matches_source_size() {
let dir = tempfile::tempdir().unwrap();
let src = dir.path().join("src.bin");
let dst = dir.path().join("dst.bin");
let len: u64 = 4 * 1024 * 1024;
make_sparse(&src, len, &[0, 2 * 1024 * 1024]).unwrap();
let n = fast_copy(&src, &dst).unwrap();
assert_eq!(n, len);
assert_eq!(std::fs::metadata(&dst).unwrap().len(), len);
}
#[test]
fn missing_source_errors() {
let dir = tempfile::tempdir().unwrap();
let err = fast_copy(&dir.path().join("nope.bin"), &dir.path().join("dst.bin")).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::NotFound);
}
}