Skip to main content

mkit_cli/commands/
clean.rs

1//! `mkit clean` — remove untracked files from the worktree (like
2//! `git clean`).
3//!
4//! Safety: this is destructive, so — matching git's `clean.requireForce`
5//! default — it **refuses to delete anything** unless `-f`/`--force` is
6//! given; `-n`/`--dry-run` previews instead. Without `-d`, untracked
7//! *directories* are left alone (git semantics). Ignored files are kept
8//! unless `-x` (also remove ignored) or `-X` (remove *only* ignored).
9//!
10//! Ignore matching uses the shared path-aware matcher (`.gitignore` +
11//! `.mkitignore`, #256), so `-x`/`-X` honor anchored/`**`/multi-segment
12//! patterns and a file under an ignored directory counts as ignored.
13
14use std::io::Write;
15use std::path::{Path, PathBuf};
16
17use clap::Parser;
18use mkit_core::ignore::{self, IgnoreList};
19use mkit_core::index::Index;
20use mkit_core::store::ObjectStore;
21
22use crate::clap_shim;
23use crate::exit;
24
25#[derive(Debug, Parser)]
26#[command(
27    name = "mkit clean",
28    about = "Remove untracked files from the worktree."
29)]
30#[allow(clippy::struct_excessive_bools)] // clap option flags, not a state machine
31struct CleanOpts {
32    /// Dry run: list what would be removed without deleting anything.
33    #[arg(short = 'n', long = "dry-run")]
34    dry_run: bool,
35    /// Actually delete. Required (or `-n`) — clean refuses otherwise.
36    #[arg(short = 'f', long)]
37    force: bool,
38    /// Also remove untracked directories.
39    #[arg(short = 'd')]
40    directories: bool,
41    /// Also remove ignored files (not just untracked ones).
42    #[arg(short = 'x', conflicts_with = "only_ignored")]
43    ignored_too: bool,
44    /// Remove ONLY ignored files.
45    #[arg(short = 'X')]
46    only_ignored: bool,
47    /// Optional pathspecs limiting what is cleaned.
48    paths: Vec<String>,
49}
50
51/// One worktree entry slated for removal.
52struct Victim {
53    /// Display path (git appends `/` to directories).
54    display: String,
55    abs: PathBuf,
56    is_dir: bool,
57}
58
59#[must_use]
60pub fn run(args: &[String]) -> u8 {
61    let opts = match clap_shim::parse::<CleanOpts>("mkit clean", args) {
62        Ok(o) => o,
63        Err(code) => return code,
64    };
65    let cwd = match std::env::current_dir() {
66        Ok(p) => p,
67        Err(e) => return emit_err(&format!("cwd: {e}"), exit::NOINPUT),
68    };
69    let store = match ObjectStore::open(&cwd) {
70        Ok(s) => s,
71        Err(e) => return emit_err(&format!("not a mkit repo: {e}"), exit::GENERAL_ERROR),
72    };
73    // Safety: never delete without an explicit -f, mirroring git's
74    // `clean.requireForce`. `-n` previews without deleting.
75    if !opts.force && !opts.dry_run {
76        return emit_err(
77            "refusing to clean without -f (use -n to preview, -f to delete)",
78            exit::GENERAL_ERROR,
79        );
80    }
81    let _lock = match super::acquire_worktree_lock(&cwd) {
82        Ok(l) => l,
83        Err(code) => return code,
84    };
85    let index = match super::read_or_seed_index_from_head(&cwd, &store) {
86        Ok(i) => i,
87        Err(e) => return emit_err(&e, exit::GENERAL_ERROR),
88    };
89    let ignore = match ignore::load(&cwd) {
90        Ok(i) => i,
91        Err(e) => return emit_err(&format!("read ignore file: {e}"), exit::GENERAL_ERROR),
92    };
93
94    let mut victims: Vec<Victim> = match collect_dir(&cwd, &cwd, "", false, &index, &ignore, &opts)
95    {
96        Ok((_root_fully_removable, v)) => v,
97        Err(e) => return emit_err(&format!("scan worktree: {e}"), exit::GENERAL_ERROR),
98    };
99
100    // Pathspec filter (repo-relative match-or-descend), if any. A `.` or
101    // empty pathspec means "everything under cwd" and is skipped.
102    let specs: Vec<String> = opts
103        .paths
104        .iter()
105        .map(|p| normalize_pathspec(p))
106        .filter(|s| !s.is_empty())
107        .collect();
108    let match_all = opts.paths.iter().any(|p| {
109        let n = normalize_pathspec(p);
110        n.is_empty()
111    });
112    if !specs.is_empty() && !match_all {
113        victims.retain(|v| {
114            let p = v.display.strip_suffix('/').unwrap_or(&v.display);
115            specs
116                .iter()
117                .any(|s| super::index_path_matches_or_descends(p, s))
118        });
119    }
120
121    // Deterministic, git-like ordering.
122    victims.sort_by(|a, b| a.display.cmp(&b.display));
123
124    let mut out = std::io::stdout().lock();
125    for v in &victims {
126        if opts.dry_run {
127            let _ = writeln!(out, "Would remove {}", v.display);
128            continue;
129        }
130        if let Err(e) = remove(&v.abs, v.is_dir) {
131            return emit_err(&format!("remove {}: {e}", v.display), exit::GENERAL_ERROR);
132        }
133        let _ = writeln!(out, "Removing {}", v.display);
134    }
135    exit::OK
136}
137
138/// Recursively gather removal candidates under `dir`. Returns
139/// `(fully_removable, victims)`: `fully_removable` is true when nothing
140/// inside the directory survives a clean, so a caller may collapse the
141/// whole subtree to a single `dir/` victim; otherwise `victims` are the
142/// individual removable entries within it.
143///
144/// Matches git: a **nested repository** (a subdirectory containing
145/// `.mkit`/`.git`) is left untouched — git only removes one with the
146/// double-force `-ff`, which mkit doesn't offer. **Ignored files are
147/// kept** (unless `-x`) and keep their parent directory alive. So a
148/// directory is removed wholesale only when every entry under it is itself
149/// removable.
150fn collect_dir(
151    root: &Path,
152    dir: &Path,
153    prefix: &str,
154    parent_ignored: bool,
155    index: &Index,
156    ignore: &IgnoreList,
157    opts: &CleanOpts,
158) -> std::io::Result<(bool, Vec<Victim>)> {
159    // Nested-repo protection. The repo root always has its own `.mkit`, so
160    // only guard SUBdirectories (prefix non-empty).
161    if !prefix.is_empty() && (dir.join(".mkit").exists() || dir.join(".git").exists()) {
162        return Ok((false, Vec::new()));
163    }
164    let read = match std::fs::read_dir(dir) {
165        Ok(r) => r,
166        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok((true, Vec::new())),
167        Err(e) => return Err(e),
168    };
169    let mut victims: Vec<Victim> = Vec::new();
170    let mut fully_removable = true;
171    for entry in read {
172        let entry = entry?;
173        let name = entry.file_name();
174        let Some(name) = name.to_str() else {
175            fully_removable = false;
176            continue;
177        };
178        if name.eq_ignore_ascii_case(".mkit") || name.eq_ignore_ascii_case(".git") {
179            fully_removable = false; // repo metadata stays
180            continue;
181        }
182        let path = if prefix.is_empty() {
183            name.to_string()
184        } else {
185            format!("{prefix}/{name}")
186        };
187        let abs = root.join(&path);
188        // A symlink is treated as a file (never followed/recursed).
189        let is_dir = std::fs::symlink_metadata(&abs)?.is_dir();
190        // A path under an ignored directory is ignored too (git "can't
191        // re-include under an excluded dir"); OR in the inherited bit. This
192        // must be computed BEFORE the tracked check so a tracked-but-ignored
193        // directory (e.g. node_modules/ with a tracked file inside) still
194        // propagates the ignored bit to its untracked descendants.
195        let ignored = parent_ignored || ignore.is_ignored(&path, is_dir);
196
197        // A directory shadowing a path tracked as a *file* is not untracked
198        // content: git reports only the tracked-side deletion and suppresses
199        // the directory's contents (#288). Skip the whole subtree — this must
200        // precede the `index_tracks_path_or_descendant` branch below, which
201        // would otherwise treat `f` as a tracked-descendant and descend into
202        // `f/`, deleting `f/child`. The dir stays (shadows a tracked path), so
203        // clear `fully_removable`.
204        if is_dir && index.has_tracked_file_at(&path) {
205            fully_removable = false;
206            continue;
207        }
208
209        if super::index_tracks_path_or_descendant(index, &path) {
210            // Tracked content keeps the dir alive; descend into a tracked
211            // directory to clean any untracked files inside it, carrying the
212            // ignored bit so ignored untracked descendants are kept.
213            fully_removable = false;
214            if is_dir {
215                let (_full, sub) = collect_dir(root, &abs, &path, ignored, index, ignore, opts)?;
216                victims.extend(sub);
217            }
218            continue;
219        }
220
221        // Untracked. `-X` keeps only ignored entries; otherwise keep
222        // non-ignored entries and ignored ones only with `-x`.
223        let include = if opts.only_ignored {
224            ignored
225        } else {
226            !ignored || opts.ignored_too
227        };
228
229        if is_dir {
230            if !opts.directories {
231                fully_removable = false; // untracked dirs need -d
232                continue;
233            }
234            let (sub_full, sub) = collect_dir(root, &abs, &path, ignored, index, ignore, opts)?;
235            if sub_full && include {
236                // The whole subtree is removable → one `dir/` victim.
237                victims.push(Victim {
238                    display: format!("{path}/"),
239                    abs,
240                    is_dir: true,
241                });
242            } else {
243                // Some entries survive (ignored / nested repo) → keep the
244                // directory, remove only its removable contents.
245                fully_removable = false;
246                victims.extend(sub);
247            }
248        } else if include {
249            victims.push(Victim {
250                display: path,
251                abs,
252                is_dir: false,
253            });
254        } else {
255            fully_removable = false; // kept (ignored) file → dir survives
256        }
257    }
258    Ok((fully_removable, victims))
259}
260
261fn remove(abs: &Path, is_dir: bool) -> std::io::Result<()> {
262    if is_dir {
263        std::fs::remove_dir_all(abs)
264    } else {
265        std::fs::remove_file(abs)
266    }
267}
268
269/// Normalize a pathspec to the index path form: strip a leading `./`,
270/// collapse `\\` to `/`, drop a trailing `/`. The cwd itself (`.` or `./`)
271/// normalizes to the empty string, meaning "everything under cwd".
272fn normalize_pathspec(spec: &str) -> String {
273    let s = spec.replace('\\', "/");
274    let s = s.strip_prefix("./").unwrap_or(&s);
275    let s = s.strip_suffix('/').unwrap_or(s);
276    if s == "." {
277        String::new()
278    } else {
279        s.to_string()
280    }
281}
282
283fn emit_err(msg: &str, code: u8) -> u8 {
284    let mut stderr = std::io::stderr().lock();
285    let _ = writeln!(stderr, "error: {msg}");
286    code
287}