Skip to main content

workon/
copy_untracked.rs

1//! Enhanced file copying with pattern matching and platform optimizations.
2//!
3//! This module provides pattern-based file copying between worktrees with platform-specific
4//! optimizations for efficient copying of large files and directories.
5//!
6//! ## Design
7//!
8//! - Uses `ignore::WalkBuilder` + git index check to enumerate candidate files.
9//! - The walker respects `.gitignore` by default (never enters `node_modules/`, `target/`, etc.).
10//! - With `include_ignored`, gitignore filtering is disabled so ignored files are visited too.
11//! - The git index is checked per file (O(1) binary search) to skip tracked files.
12//! - Patterns filter the candidate list.
13//! - Opt-out ignored file support: `--no-include-ignored` / `workon.copyIncludeIgnored=false`.
14//!
15//! ## Pattern Matching
16//!
17//! Uses standard glob patterns via the `glob` crate:
18//! - `*.env` - All .env files in current directory
19//! - `.env*` - All files starting with .env
20//! - `**/*.json` - All JSON files recursively
21//! - `.vscode/` - Entire directory and contents
22//!
23//! Exclude patterns work the same way, checked after include patterns match.
24//! An empty include pattern list means "match all candidates".
25//!
26//! ## Platform Optimizations
27//!
28//! Platform-specific copy-on-write optimizations for large files:
29//! - **macOS**: `clonefile(2)` syscall — instant CoW copies on APFS
30//! - **Linux**: `ioctl(FICLONE)` — CoW copies on btrfs/XFS when supported
31//! - **Other**: Standard `fs::copy` fallback
32//!
33//! ## Behavior
34//!
35//! - Only copies files (directories are skipped, but created as needed for nested files)
36//! - Automatic parent directory creation for nested files
37//! - Skips files that already exist at destination (unless --force)
38//! - Returns list of successfully copied files
39//!
40//! ## Example Usage
41//!
42//! ```bash
43//! # Copy specific patterns (ignored files included by default)
44//! git workon copy --pattern '.env*' --pattern '.vscode/'
45//!
46//! # Configure automatic copying
47//! git config workon.autoCopy true
48//! git config --add workon.copyPattern '.env.local'
49//! git config --add workon.copyPattern 'node_modules/'
50//! git config --add workon.copyExclude '.env.production'
51//! ```
52
53use std::fs;
54use std::path::{Path, PathBuf};
55
56use crate::error::{CopyError, Result};
57
58type SkipCallback = Box<dyn FnMut(&'static str, &Path)>;
59
60/// Options for [`copy_untracked`].
61///
62/// Callbacks default to no-ops; override them to observe progress.
63pub struct CopyOptions<'a> {
64    /// Glob patterns to include; empty means match all candidates.
65    pub patterns: &'a [String],
66    /// Glob patterns to exclude after include matching.
67    pub excludes: &'a [String],
68    /// Overwrite files that already exist at the destination.
69    pub force: bool,
70    /// Also copy git-ignored files (e.g., `node_modules/`, `.env.local`).
71    pub include_ignored: bool,
72    /// Called after each file is successfully copied.
73    pub on_copied: Box<dyn FnMut(&Path)>,
74    /// Called when a file is skipped, with a reason of `"tracked"` or `"exists"`.
75    pub on_skipped: SkipCallback,
76}
77
78impl Default for CopyOptions<'_> {
79    fn default() -> Self {
80        Self {
81            patterns: &[],
82            excludes: &[],
83            force: false,
84            include_ignored: true,
85            on_copied: Box::new(|_| {}),
86            on_skipped: Box::new(|_, _| {}),
87        }
88    }
89}
90
91/// Copy only untracked (and optionally ignored) files from source to destination.
92///
93/// Uses `ignore::WalkBuilder` to walk `from_path`, skipping gitignored paths by default
94/// (so `node_modules/`, `target/`, etc. are never entered). With `include_ignored`, gitignore
95/// filtering is disabled and all files are visited. In both cases, tracked files are filtered
96/// out via an O(1) git index lookup.
97pub fn copy_untracked(
98    from_path: &Path,
99    to_path: &Path,
100    options: CopyOptions<'_>,
101) -> Result<Vec<PathBuf>> {
102    let CopyOptions {
103        patterns,
104        excludes,
105        force,
106        include_ignored,
107        mut on_copied,
108        mut on_skipped,
109    } = options;
110
111    let repo = git2::Repository::open(from_path).map_err(|source| CopyError::RepoOpen {
112        path: from_path.to_path_buf(),
113        source,
114    })?;
115
116    // Build a set of tracked paths for O(1) per-file lookup.
117    // Using a HashSet instead of index.get_path() per file avoids a libgit2 quirk:
118    // git_index_get_bypath sets the error buffer even when returning NULL (path not
119    // found), which poisons the next try_call! error message with a stale value.
120    let mut index = repo.index().map_err(|source| CopyError::RepoOpen {
121        path: from_path.to_path_buf(),
122        source,
123    })?;
124    index.read(false).map_err(|source| CopyError::RepoOpen {
125        path: from_path.to_path_buf(),
126        source,
127    })?;
128    let tracked: std::collections::HashSet<Vec<u8>> = index.iter().map(|e| e.path).collect();
129
130    // Compile include patterns once. Empty list = match all.
131    let include_patterns: Vec<glob::Pattern> = patterns
132        .iter()
133        .map(|p| {
134            glob::Pattern::new(p).map_err(|e| CopyError::InvalidGlobPattern {
135                pattern: p.clone(),
136                source: e,
137            })
138        })
139        .collect::<std::result::Result<Vec<_>, CopyError>>()?;
140
141    // Compile exclude patterns once (previously compiled per-file — now O(1) per check).
142    let exclude_patterns: Vec<glob::Pattern> = excludes
143        .iter()
144        .map(|p| {
145            glob::Pattern::new(p).map_err(|e| CopyError::InvalidGlobPattern {
146                pattern: p.clone(),
147                source: e,
148            })
149        })
150        .collect::<std::result::Result<Vec<_>, CopyError>>()?;
151
152    let match_opts = glob::MatchOptions {
153        case_sensitive: true,
154        require_literal_separator: false,
155        require_literal_leading_dot: false,
156    };
157
158    // Build walker. Include hidden files (e.g., .env, .vscode/).
159    // By default, respects .gitignore — never descends into node_modules/, target/, etc.
160    // With include_ignored, disable all git-based filtering to visit ignored files too.
161    let mut builder = ignore::WalkBuilder::new(from_path);
162    builder.hidden(false);
163    if include_ignored {
164        builder
165            .git_ignore(false)
166            .git_global(false)
167            .git_exclude(false);
168    }
169
170    let mut copied_files = Vec::new();
171
172    for entry in builder.build() {
173        let entry = match entry {
174            Ok(e) => e,
175            Err(e) => {
176                log::debug!("Walk error: {}", e);
177                continue;
178            }
179        };
180
181        // Skip directories
182        if entry.file_type().is_none_or(|ft| ft.is_dir()) {
183            continue;
184        }
185
186        let path = entry.path();
187
188        // Get relative path from from_path
189        let rel_path = match path.strip_prefix(from_path) {
190            Ok(p) => p.to_path_buf(),
191            Err(_) => continue,
192        };
193
194        let rel_path_str = match rel_path.to_str() {
195            Some(s) => s,
196            None => continue,
197        };
198
199        // Skip .git entry — in worktrees this is a file (not a directory) containing
200        // a gitdir pointer, so the directory check above doesn't catch it. Copying it
201        // would corrupt the destination worktree's git pointer.
202        if rel_path == Path::new(".git") {
203            continue;
204        }
205
206        // Skip files tracked in the git index (handles `git add -f`'d ignored files correctly)
207        if tracked.contains(rel_path_str.as_bytes()) {
208            on_skipped("tracked", &rel_path);
209            continue;
210        }
211
212        // Apply include patterns (empty = match all)
213        if !include_patterns.is_empty()
214            && !include_patterns
215                .iter()
216                .any(|p| p.matches_with(rel_path_str, match_opts))
217        {
218            continue;
219        }
220
221        // Apply exclude patterns
222        if exclude_patterns
223            .iter()
224            .any(|p| p.matches_with(rel_path_str, match_opts))
225        {
226            continue;
227        }
228
229        let dest_file = to_path.join(&rel_path);
230
231        // Skip if destination exists and not forcing
232        if dest_file.exists() && !force {
233            on_skipped("exists", &rel_path);
234            continue;
235        }
236
237        // Create parent directories if needed
238        if let Some(parent) = dest_file.parent() {
239            fs::create_dir_all(parent)?;
240        }
241
242        copy_file_platform(path, &dest_file)?;
243        on_copied(&rel_path);
244        copied_files.push(rel_path);
245    }
246
247    Ok(copied_files)
248}
249
250/// Copy a file using platform-specific copy-on-write when available.
251///
252/// Uses direct syscalls to avoid per-file subprocess overhead:
253/// - macOS: `clonefile(2)` for instant CoW on APFS; falls back to `fs::copy`
254/// - Linux: `ioctl(FICLONE)` for CoW on btrfs/XFS; falls back to `fs::copy`
255/// - Other: `fs::copy`
256#[cfg(target_os = "macos")]
257fn copy_file_platform(src: &Path, dest: &Path) -> Result<()> {
258    use std::ffi::CString;
259    use std::os::unix::ffi::OsStrExt;
260
261    let src_c = CString::new(src.as_os_str().as_bytes()).map_err(|_| CopyError::CopyFailed {
262        src: src.to_path_buf(),
263        dest: dest.to_path_buf(),
264        source: std::io::Error::from(std::io::ErrorKind::InvalidInput),
265    })?;
266    let dest_c = CString::new(dest.as_os_str().as_bytes()).map_err(|_| CopyError::CopyFailed {
267        src: src.to_path_buf(),
268        dest: dest.to_path_buf(),
269        source: std::io::Error::from(std::io::ErrorKind::InvalidInput),
270    })?;
271
272    // clonefile(2): instant CoW copy on APFS; fails on non-APFS or cross-device
273    if unsafe { libc::clonefile(src_c.as_ptr(), dest_c.as_ptr(), 0) } == 0 {
274        return Ok(());
275    }
276
277    // Fall back to standard copy (non-APFS, cross-filesystem, etc.)
278    fs::copy(src, dest)
279        .map(|_| ())
280        .map_err(|e| CopyError::CopyFailed {
281            src: src.to_path_buf(),
282            dest: dest.to_path_buf(),
283            source: e,
284        })
285        .map_err(Into::into)
286}
287
288#[cfg(target_os = "linux")]
289fn copy_file_platform(src: &Path, dest: &Path) -> Result<()> {
290    use std::fs::{File, OpenOptions};
291    use std::os::unix::io::AsRawFd;
292
293    // FICLONE ioctl: _IOW(0x94, 9, int) = 0x40049409
294    // Performs a reflink copy on btrfs/XFS; fails on unsupported filesystems
295    const FICLONE: libc::c_ulong = 0x40049409;
296
297    if let (Ok(src_file), Ok(dest_file)) = (
298        File::open(src),
299        OpenOptions::new()
300            .write(true)
301            .create(true)
302            .truncate(true)
303            .open(dest),
304    ) {
305        if unsafe { libc::ioctl(dest_file.as_raw_fd(), FICLONE, src_file.as_raw_fd()) } == 0 {
306            return Ok(());
307        }
308        // ioctl failed — dest file is open but may be empty, drop before overwriting
309        drop(dest_file);
310    }
311
312    // Fall back to standard copy (non-btrfs/XFS, cross-filesystem, etc.)
313    fs::copy(src, dest)
314        .map(|_| ())
315        .map_err(|e| CopyError::CopyFailed {
316            src: src.to_path_buf(),
317            dest: dest.to_path_buf(),
318            source: e,
319        })
320        .map_err(Into::into)
321}
322
323#[cfg(not(any(target_os = "macos", target_os = "linux")))]
324fn copy_file_platform(src: &Path, dest: &Path) -> Result<()> {
325    fs::copy(src, dest)
326        .map(|_| ())
327        .map_err(|e| CopyError::CopyFailed {
328            src: src.to_path_buf(),
329            dest: dest.to_path_buf(),
330            source: e,
331        })
332        .map_err(Into::into)
333}