Skip to main content

coreutils_rs/cp/
core.rs

1use std::io;
2use std::path::Path;
3
4#[cfg(unix)]
5use std::os::unix::fs::MetadataExt;
6#[cfg(unix)]
7use std::os::unix::fs::PermissionsExt;
8
9/// How to dereference (follow) symbolic links.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum DerefMode {
12    /// Never follow symlinks (copy the link itself).
13    Never,
14    /// Follow symlinks given on the command line, but not encountered during recursion.
15    CommandLine,
16    /// Always follow symlinks.
17    Always,
18}
19
20/// Backup strategy, following GNU `--backup` semantics.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum BackupMode {
23    /// Numbered backups (~1~, ~2~, ...).
24    Numbered,
25    /// Numbered if numbered backups already exist, otherwise simple.
26    Existing,
27    /// Simple backup with suffix.
28    Simple,
29    /// Never make backups.
30    None,
31}
32
33/// Reflink (copy-on-write clone) strategy.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum ReflinkMode {
36    /// Try reflink, fall back to normal copy.
37    Auto,
38    /// Require reflink; fail if not supported.
39    Always,
40    /// Never attempt reflink.
41    Never,
42}
43
44/// Configuration for a cp invocation.
45pub struct CpConfig {
46    pub recursive: bool,
47    pub force: bool,
48    pub interactive: bool,
49    pub no_clobber: bool,
50    pub verbose: bool,
51    pub preserve_mode: bool,
52    pub preserve_ownership: bool,
53    pub preserve_timestamps: bool,
54    pub dereference: DerefMode,
55    pub link: bool,
56    pub symbolic_link: bool,
57    pub update: bool,
58    pub one_file_system: bool,
59    pub backup: Option<BackupMode>,
60    pub suffix: String,
61    pub reflink: ReflinkMode,
62    pub target_directory: Option<String>,
63    pub no_target_directory: bool,
64}
65
66impl Default for CpConfig {
67    fn default() -> Self {
68        Self {
69            recursive: false,
70            force: false,
71            interactive: false,
72            no_clobber: false,
73            verbose: false,
74            preserve_mode: false,
75            preserve_ownership: false,
76            preserve_timestamps: false,
77            dereference: DerefMode::CommandLine,
78            link: false,
79            symbolic_link: false,
80            update: false,
81            one_file_system: false,
82            backup: None,
83            suffix: "~".to_string(),
84            reflink: ReflinkMode::Auto,
85            target_directory: None,
86            no_target_directory: false,
87        }
88    }
89}
90
91/// Parse a `--backup=CONTROL` value.
92pub fn parse_backup_mode(s: &str) -> Result<BackupMode, String> {
93    match s {
94        "none" | "off" => Ok(BackupMode::None),
95        "numbered" | "t" => Ok(BackupMode::Numbered),
96        "existing" | "nil" => Ok(BackupMode::Existing),
97        "simple" | "never" => Ok(BackupMode::Simple),
98        _ => Err(format!("invalid backup type '{}'", s)),
99    }
100}
101
102/// Parse a `--reflink[=WHEN]` value.
103pub fn parse_reflink_mode(s: &str) -> Result<ReflinkMode, String> {
104    match s {
105        "auto" => Ok(ReflinkMode::Auto),
106        "always" => Ok(ReflinkMode::Always),
107        "never" => Ok(ReflinkMode::Never),
108        _ => Err(format!("invalid reflink value '{}'", s)),
109    }
110}
111
112/// Parse a `--preserve[=LIST]` attribute list.
113///
114/// Supports: mode, ownership, timestamps, links, context, xattr, all.
115pub fn apply_preserve(list: &str, config: &mut CpConfig) {
116    for attr in list.split(',') {
117        match attr.trim() {
118            "mode" => config.preserve_mode = true,
119            "ownership" => config.preserve_ownership = true,
120            "timestamps" => config.preserve_timestamps = true,
121            "links" | "context" | "xattr" => { /* acknowledged but not yet implemented */ }
122            "all" => {
123                config.preserve_mode = true;
124                config.preserve_ownership = true;
125                config.preserve_timestamps = true;
126            }
127            _ => {}
128        }
129    }
130}
131
132// ---- backup helpers ----
133
134/// Create a backup of `dst` if it exists, according to the configured backup mode.
135/// Returns `Ok(())` when no backup is needed or the backup was made successfully.
136fn make_backup(dst: &Path, config: &CpConfig) -> io::Result<()> {
137    let mode = match config.backup {
138        Some(m) => m,
139        None => return Ok(()),
140    };
141    if mode == BackupMode::None {
142        return Ok(());
143    }
144    if !dst.exists() {
145        return Ok(());
146    }
147
148    let backup_path = match mode {
149        BackupMode::Simple | BackupMode::None => {
150            let mut p = dst.as_os_str().to_os_string();
151            p.push(&config.suffix);
152            std::path::PathBuf::from(p)
153        }
154        BackupMode::Numbered => numbered_backup_path(dst),
155        BackupMode::Existing => {
156            // Use numbered if any numbered backup already exists.
157            let numbered = numbered_backup_candidate(dst, 1);
158            if numbered.exists() {
159                numbered_backup_path(dst)
160            } else {
161                let mut p = dst.as_os_str().to_os_string();
162                p.push(&config.suffix);
163                std::path::PathBuf::from(p)
164            }
165        }
166    };
167
168    std::fs::rename(dst, &backup_path)?;
169    Ok(())
170}
171
172fn numbered_backup_path(dst: &Path) -> std::path::PathBuf {
173    let mut n: u64 = 1;
174    loop {
175        let candidate = numbered_backup_candidate(dst, n);
176        if !candidate.exists() {
177            return candidate;
178        }
179        n += 1;
180    }
181}
182
183fn numbered_backup_candidate(dst: &Path, n: u64) -> std::path::PathBuf {
184    let mut p = dst.as_os_str().to_os_string();
185    p.push(format!(".~{}~", n));
186    std::path::PathBuf::from(p)
187}
188
189// ---- attribute preservation ----
190
191/// Preserve file attributes (mode, timestamps, ownership) on `dst` using
192/// pre-fetched source metadata (avoids redundant stat calls).
193fn preserve_attributes_from_meta(
194    meta: &std::fs::Metadata,
195    dst: &Path,
196    config: &CpConfig,
197) -> io::Result<()> {
198    // Only chmod when -p/--preserve=mode is set. Without it, the destination
199    // keeps its O_CREAT permissions (source_mode & ~umask), matching GNU cp.
200    #[cfg(unix)]
201    if config.preserve_mode {
202        let mode = meta.mode();
203        std::fs::set_permissions(dst, std::fs::Permissions::from_mode(mode))?;
204    }
205
206    #[cfg(unix)]
207    if config.preserve_timestamps {
208        let atime_spec = libc::timespec {
209            tv_sec: meta.atime(),
210            tv_nsec: meta.atime_nsec(),
211        };
212        let mtime_spec = libc::timespec {
213            tv_sec: meta.mtime(),
214            tv_nsec: meta.mtime_nsec(),
215        };
216        let times = [atime_spec, mtime_spec];
217        // SAFETY: CString::new checks for interior NULs; the path is valid UTF-8/bytes.
218        let c_path = std::ffi::CString::new(dst.as_os_str().as_encoded_bytes())
219            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
220        // SAFETY: c_path is a valid NUL-terminated C string, times is a valid [timespec; 2].
221        let ret = unsafe { libc::utimensat(libc::AT_FDCWD, c_path.as_ptr(), times.as_ptr(), 0) };
222        if ret != 0 {
223            return Err(io::Error::last_os_error());
224        }
225    }
226
227    #[cfg(unix)]
228    if config.preserve_ownership {
229        // SAFETY: CString::new checks for interior NULs; the path is valid bytes.
230        let c_path = std::ffi::CString::new(dst.as_os_str().as_encoded_bytes())
231            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
232        // SAFETY: c_path is a valid NUL-terminated C string, uid/gid are valid u32 values.
233        let ret = unsafe { libc::lchown(c_path.as_ptr(), meta.uid(), meta.gid()) };
234        if ret != 0 {
235            // Ownership preservation may fail for non-root; ignore EPERM.
236            let err = io::Error::last_os_error();
237            if err.raw_os_error() != Some(libc::EPERM) {
238                return Err(err);
239            }
240        }
241    }
242
243    // Suppress unused-variable warnings on non-unix platforms.
244    #[cfg(not(unix))]
245    {
246        let _ = (meta, config);
247    }
248
249    Ok(())
250}
251
252// ---- large-buffer fallback copy ----
253
254/// Copy file data using a thread-local buffer (up to 4MB, capped to file size).
255/// Avoids stdlib's 64KB default buffer and amortizes allocation across files.
256/// Creates the destination with `src_mode` so the kernel applies the process umask.
257fn copy_data_large_buf(src: &Path, dst: &Path, src_len: u64, src_mode: u32) -> io::Result<()> {
258    use std::cell::RefCell;
259    use std::io::{Read, Write};
260    const MAX_BUF: usize = 4 * 1024 * 1024; // 4 MB
261    /// Shrink the thread-local buffer when it exceeds this size and the current
262    /// file needs much less, to avoid holding 4 MB per Rayon thread permanently.
263    const SHRINK_THRESHOLD: usize = 512 * 1024; // 512 KB
264
265    thread_local! {
266        static BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
267    }
268
269    // Safe on 32-bit: clamp via u64 before casting to usize.
270    let buf_size = src_len.min(MAX_BUF as u64).max(8192) as usize;
271
272    let mut reader = std::fs::File::open(src)?;
273    let mut opts = std::fs::OpenOptions::new();
274    opts.write(true).create(true).truncate(true);
275    #[cfg(unix)]
276    {
277        use std::os::unix::fs::OpenOptionsExt;
278        opts.mode(src_mode);
279    }
280    #[cfg(not(unix))]
281    let _ = src_mode;
282    let mut writer = opts.open(dst)?;
283
284    BUF.with(|cell| {
285        let mut buf = cell.borrow_mut();
286        // Shrink if buffer is much larger than needed to limit per-thread memory.
287        if buf.len() > SHRINK_THRESHOLD && buf_size < buf.len() / 4 {
288            buf.resize(buf_size, 0);
289            buf.shrink_to_fit();
290        } else if buf.len() < buf_size {
291            buf.resize(buf_size, 0);
292        }
293        loop {
294            let n = reader.read(&mut buf[..buf_size])?;
295            if n == 0 {
296                break;
297            }
298            writer.write_all(&buf[..n])?;
299        }
300        Ok(())
301    })
302}
303
304// ---- Linux copy_file_range optimisation ----
305
306#[cfg(target_os = "linux")]
307fn copy_file_range_linux(src: &Path, dst: &Path, src_mode: u32) -> io::Result<()> {
308    use std::os::unix::fs::OpenOptionsExt;
309    use std::os::unix::io::AsRawFd;
310
311    let src_file = std::fs::File::open(src)?;
312    let src_meta = src_file.metadata()?;
313    let len = src_meta.len();
314
315    let dst_file = std::fs::OpenOptions::new()
316        .write(true)
317        .create(true)
318        .truncate(true)
319        .mode(src_mode)
320        .open(dst)?;
321
322    let mut remaining = len as i64;
323    while remaining > 0 {
324        // Cap to isize::MAX to avoid overflow on 32-bit when casting to usize.
325        let to_copy = (remaining as u64).min(isize::MAX as u64) as usize;
326        // SAFETY: src_file and dst_file are valid open file descriptors;
327        // null offsets mean the kernel uses and updates the file offsets.
328        // Uses raw syscall instead of libc::copy_file_range to support
329        // older glibc versions (e.g. cross-compilation with cross-rs).
330        let ret = unsafe {
331            libc::syscall(
332                libc::SYS_copy_file_range,
333                src_file.as_raw_fd(),
334                std::ptr::null_mut::<libc::off64_t>(),
335                dst_file.as_raw_fd(),
336                std::ptr::null_mut::<libc::off64_t>(),
337                to_copy,
338                0u32,
339            )
340        };
341        if ret < 0 {
342            return Err(io::Error::last_os_error());
343        }
344        if ret == 0 {
345            // EOF before all bytes copied — break to avoid infinite loop
346            break;
347        }
348        remaining -= ret as i64;
349    }
350    Ok(())
351}
352
353// ---- single-file copy ----
354
355/// Copy a single file (or symlink) from `src` to `dst`.
356pub fn copy_file(src: &Path, dst: &Path, config: &CpConfig) -> io::Result<()> {
357    let src_meta = if config.dereference == DerefMode::Always {
358        std::fs::metadata(src)?
359    } else {
360        std::fs::symlink_metadata(src)?
361    };
362
363    copy_file_with_meta(src, dst, &src_meta, config)
364}
365
366/// Copy a single file using pre-fetched metadata (avoids redundant stat).
367fn copy_file_with_meta(
368    src: &Path,
369    dst: &Path,
370    src_meta: &std::fs::Metadata,
371    config: &CpConfig,
372) -> io::Result<()> {
373    // Handle symlink when not dereferencing.
374    if src_meta.file_type().is_symlink() && config.dereference == DerefMode::Never {
375        let target = std::fs::read_link(src)?;
376        #[cfg(unix)]
377        {
378            std::os::unix::fs::symlink(&target, dst)?;
379        }
380        #[cfg(not(unix))]
381        {
382            // Fallback: try a regular copy (symlinks are not portable).
383            let _ = target;
384            std::fs::copy(src, dst)?;
385        }
386        return Ok(());
387    }
388
389    // Hard link mode.
390    if config.link {
391        std::fs::hard_link(src, dst)?;
392        return Ok(());
393    }
394
395    // Symbolic link mode.
396    if config.symbolic_link {
397        #[cfg(unix)]
398        {
399            std::os::unix::fs::symlink(src, dst)?;
400        }
401        #[cfg(not(unix))]
402        {
403            return Err(io::Error::new(
404                io::ErrorKind::Unsupported,
405                "symbolic links are not supported on this platform",
406            ));
407        }
408        return Ok(());
409    }
410
411    // Try reflink (FICLONE ioctl) for instant CoW copy on btrfs/XFS.
412    #[cfg(target_os = "linux")]
413    {
414        if matches!(config.reflink, ReflinkMode::Auto | ReflinkMode::Always) {
415            use std::os::unix::io::AsRawFd;
416            // FICLONE = _IOW(0x94, 9, int) from linux/fs.h
417            const FICLONE: libc::c_ulong = 0x40049409;
418
419            if let Ok(src_file) = std::fs::File::open(src) {
420                let dst_file = std::fs::OpenOptions::new()
421                    .write(true)
422                    .create(true)
423                    .truncate(true)
424                    .open(dst);
425                if let Ok(dst_file) = dst_file {
426                    // SAFETY: Both file descriptors are valid (files are open),
427                    // FICLONE takes an fd as argument, and we check the return value.
428                    let ret =
429                        unsafe { libc::ioctl(dst_file.as_raw_fd(), FICLONE, src_file.as_raw_fd()) };
430                    if ret == 0 {
431                        preserve_attributes_from_meta(src_meta, dst, config)?;
432                        return Ok(());
433                    }
434                    if config.reflink == ReflinkMode::Always {
435                        return Err(io::Error::new(
436                            io::ErrorKind::Unsupported,
437                            format!(
438                                "failed to clone '{}' to '{}': {}",
439                                src.display(),
440                                dst.display(),
441                                io::Error::last_os_error()
442                            ),
443                        ));
444                    }
445                    // Auto mode: fall through to other copy methods
446                }
447            }
448        }
449    }
450
451    // Try Linux copy_file_range for zero-copy.
452    #[cfg(target_os = "linux")]
453    let src_mode_bits = src_meta.mode();
454    #[cfg(target_os = "linux")]
455    {
456        match copy_file_range_linux(src, dst, src_mode_bits) {
457            Ok(()) => {
458                preserve_attributes_from_meta(src_meta, dst, config)?;
459                return Ok(());
460            }
461            Err(e)
462                if matches!(
463                    e.raw_os_error(),
464                    Some(libc::EINVAL | libc::ENOSYS | libc::EXDEV)
465                ) =>
466            {
467                // Unsupported/cross-device — fall through to large-buffer copy
468            }
469            Err(e) => return Err(e),
470        }
471    }
472
473    // Fallback: large-buffer copy (up to 4MB vs stdlib's 64KB).
474    #[cfg(unix)]
475    let mode = src_meta.mode();
476    #[cfg(not(unix))]
477    let mode = 0o666u32;
478    copy_data_large_buf(src, dst, src_meta.len(), mode)?;
479    preserve_attributes_from_meta(src_meta, dst, config)?;
480    Ok(())
481}
482
483// ---- recursive copy ----
484
485/// Recursively copy `src` to `dst`, using parallel file copies within each directory.
486fn copy_recursive(
487    src: &Path,
488    dst: &Path,
489    config: &CpConfig,
490    root_dev: Option<u64>,
491) -> io::Result<()> {
492    let src_meta = std::fs::symlink_metadata(src)?;
493
494    #[cfg(unix)]
495    if config.one_file_system {
496        if let Some(dev) = root_dev {
497            if src_meta.dev() != dev {
498                return Ok(());
499            }
500        }
501    }
502
503    if src_meta.is_dir() {
504        if !dst.exists() {
505            std::fs::create_dir_all(dst)?;
506        }
507
508        #[cfg(unix)]
509        let next_dev = Some(root_dev.unwrap_or(src_meta.dev()));
510        #[cfg(not(unix))]
511        let next_dev: Option<u64> = None;
512
513        // Collect entries and partition into files and directories.
514        let mut files: Vec<(std::path::PathBuf, std::path::PathBuf, std::fs::Metadata)> =
515            Vec::new();
516        let mut dirs: Vec<(std::path::PathBuf, std::path::PathBuf)> = Vec::new();
517
518        for entry in std::fs::read_dir(src)? {
519            let entry = entry?;
520            let child_src = entry.path();
521            let child_dst = dst.join(entry.file_name());
522            // Respect dereference mode: follow symlinks when Always.
523            let meta = if config.dereference == DerefMode::Always {
524                std::fs::metadata(&child_src)?
525            } else {
526                std::fs::symlink_metadata(&child_src)?
527            };
528            // Check --one-file-system for all entries (not just directories).
529            #[cfg(unix)]
530            if config.one_file_system {
531                if let Some(dev) = root_dev {
532                    if meta.dev() != dev {
533                        continue;
534                    }
535                }
536            }
537            if meta.is_dir() {
538                dirs.push((child_src, child_dst));
539            } else {
540                files.push((child_src, child_dst, meta));
541            }
542        }
543
544        /// Minimum number of files before we parallelize copies within a directory.
545        /// Rayon dispatch overhead dominates below this threshold (empirical).
546        const PARALLEL_FILE_THRESHOLD: usize = 8;
547
548        // Copy files in parallel using Rayon when there are enough to benefit.
549        if files.len() >= PARALLEL_FILE_THRESHOLD {
550            use rayon::prelude::*;
551            let result: Result<(), io::Error> =
552                files
553                    .par_iter()
554                    .try_for_each(|(child_src, child_dst, meta)| {
555                        copy_file_with_meta(child_src, child_dst, meta, config)
556                    });
557            result?;
558        } else {
559            for (child_src, child_dst, meta) in &files {
560                copy_file_with_meta(child_src, child_dst, meta, config)?;
561            }
562        }
563
564        // Recurse into subdirectories sequentially (they may create dirs that
565        // need to exist before their children can be copied).
566        for (child_src, child_dst) in &dirs {
567            copy_recursive(child_src, child_dst, config, next_dev)?;
568        }
569
570        // Preserve directory attributes after copying contents.
571        preserve_attributes_from_meta(&src_meta, dst, config)?;
572    } else {
573        // If parent directory does not exist, create it.
574        if let Some(parent) = dst.parent() {
575            if !parent.exists() {
576                std::fs::create_dir_all(parent)?;
577            }
578        }
579        copy_file_with_meta(src, dst, &src_meta, config)?;
580    }
581    Ok(())
582}
583
584// ---- main entry point ----
585
586/// Determine the effective destination and perform the copy.
587///
588/// `sources` is the list of source paths; `raw_dest` is the positional destination
589/// (may be `None` when `--target-directory` is used).
590///
591/// Returns a list of per-file error messages (empty on full success) and a bool
592/// indicating whether any error occurred.
593pub fn run_cp(
594    sources: &[String],
595    raw_dest: Option<&str>,
596    config: &CpConfig,
597) -> (Vec<String>, bool) {
598    let mut errors: Vec<String> = Vec::new();
599    let mut had_error = false;
600
601    // Resolve destination directory.
602    let dest_dir: Option<std::path::PathBuf> = config
603        .target_directory
604        .as_deref()
605        .or(raw_dest)
606        .map(std::path::PathBuf::from);
607
608    let dest_dir = match dest_dir {
609        Some(d) => d,
610        None => {
611            errors.push("cp: missing destination operand".to_string());
612            return (errors, true);
613        }
614    };
615
616    // Multiple sources or target is an existing directory => copy into directory.
617    let copy_into_dir = sources.len() > 1 || dest_dir.is_dir() || config.target_directory.is_some();
618
619    // When -T is set, never treat destination as a directory.
620    let copy_into_dir = copy_into_dir && !config.no_target_directory;
621
622    for source in sources {
623        let src = Path::new(source);
624        let dst = if copy_into_dir {
625            let name = src.file_name().unwrap_or(src.as_ref());
626            dest_dir.join(name)
627        } else {
628            dest_dir.clone()
629        };
630
631        if let Err(e) = do_copy(src, &dst, config) {
632            let msg = format!(
633                "cp: cannot copy '{}' to '{}': {}",
634                src.display(),
635                dst.display(),
636                strip_os_error(&e)
637            );
638            errors.push(msg);
639            had_error = true;
640        } else if config.verbose {
641            // Verbose output goes to stderr to match GNU behavior when piped.
642            eprintln!("'{}' -> '{}'", src.display(), dst.display());
643        }
644    }
645
646    (errors, had_error)
647}
648
649/// Core copy dispatcher for a single source -> destination pair.
650fn do_copy(src: &Path, dst: &Path, config: &CpConfig) -> io::Result<()> {
651    let src_meta = if config.dereference == DerefMode::Always {
652        std::fs::metadata(src)?
653    } else {
654        std::fs::symlink_metadata(src)?
655    };
656
657    // Reject directory source without -R.
658    if src_meta.is_dir() && !config.recursive {
659        return Err(io::Error::new(
660            io::ErrorKind::Other,
661            format!("omitting directory '{}'", src.display()),
662        ));
663    }
664
665    // No-clobber: skip if destination exists.
666    if config.no_clobber && dst.exists() {
667        return Ok(());
668    }
669
670    // Update: skip if destination is same age or newer.
671    if config.update && dst.exists() {
672        if let (Ok(src_m), Ok(dst_m)) = (src.metadata(), dst.metadata()) {
673            if let (Ok(src_t), Ok(dst_t)) = (src_m.modified(), dst_m.modified()) {
674                if dst_t >= src_t {
675                    return Ok(());
676                }
677            }
678        }
679    }
680
681    // Interactive: prompt on stderr.
682    if config.interactive && dst.exists() {
683        eprint!("cp: overwrite '{}'? ", dst.display());
684        let mut response = String::new();
685        io::stdin().read_line(&mut response)?;
686        let r = response.trim().to_lowercase();
687        if !(r == "y" || r == "yes") {
688            return Ok(());
689        }
690    }
691
692    // Force: remove existing destination if it cannot be opened for writing.
693    if config.force && dst.exists() {
694        if let Ok(m) = dst.metadata() {
695            if m.permissions().readonly() {
696                std::fs::remove_file(dst)?;
697            }
698        }
699    }
700
701    // Make backup if requested.
702    make_backup(dst, config)?;
703
704    if src_meta.is_dir() {
705        #[cfg(unix)]
706        let root_dev = Some(src_meta.dev());
707        #[cfg(not(unix))]
708        let root_dev: Option<u64> = None;
709        copy_recursive(src, dst, config, root_dev)
710    } else {
711        copy_file(src, dst, config)
712    }
713}
714
715/// Strip the " (os error N)" suffix from an io::Error for GNU-compatible messages.
716fn strip_os_error(e: &io::Error) -> String {
717    if let Some(raw) = e.raw_os_error() {
718        let msg = format!("{}", e);
719        msg.replace(&format!(" (os error {})", raw), "")
720    } else {
721        format!("{}", e)
722    }
723}