use std::collections::HashSet;
use std::path::Path;
use std::time::{Duration, SystemTime};
use git_lfs_git::fetch_prune::FetchPruneConfig;
use git_lfs_git::scanner::{
scan_index_pointers, scan_pointers, scan_pointers_with_args, scan_previous_versions,
scan_stashed, scan_tree,
};
use git_lfs_pointer::Oid;
use git_lfs_store::Store;
use crate::fetch::{fetch_filter_set, paths_pass_filter};
use crate::fetcher::LfsFetcher;
use crate::push::remote_tracking_refs;
#[derive(Debug, thiserror::Error)]
pub enum PruneError {
#[error(transparent)]
Git(#[from] git_lfs_git::Error),
#[error(transparent)]
Push(#[from] crate::push::PushCommandError),
#[error(transparent)]
Fetch(#[from] crate::fetch::FetchCommandError),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(
"prune halted: objects missing on remote (re-run with --when-unverified=continue to drop them from the delete set)"
)]
UnverifiedHalt,
#[error("prune verify failed: {0}")]
Verify(String),
}
#[derive(Debug, Clone)]
pub struct Options {
pub dry_run: bool,
pub verbose: bool,
pub recent: bool,
pub force: bool,
pub verify_remote: bool,
pub no_verify_remote: bool,
pub verify_unreachable: bool,
pub no_verify_unreachable: bool,
pub continue_when_unverified: bool,
}
pub fn run(cwd: &Path, opts: &Options) -> Result<(), PruneError> {
let store = Store::new(git_lfs_git::lfs_dir(cwd)?);
let local_objects = store.each_object()?;
if local_objects.is_empty() {
println!("No local LFS objects to prune.");
return Ok(());
}
let retained = build_retain_set(cwd, opts)?;
let mut prunable: Vec<(Oid, u64)> = Vec::new();
for (oid, size) in &local_objects {
if !retained.contains(oid) {
prunable.push((*oid, *size));
}
}
let local_count = local_objects.len();
let retained_count = local_count - prunable.len();
if prunable.is_empty() {
println!("{local_count} local objects, {retained_count} retained, done.");
return Ok(());
}
let cfg = FetchPruneConfig::from_repo(cwd);
let verify_remote =
!opts.no_verify_remote && (opts.verify_remote || cfg.prune_verify_remote_always);
let verify_unreachable = !opts.no_verify_unreachable
&& (opts.verify_unreachable || cfg.prune_verify_unreachable_always);
let (mut delete_list, missing_on_remote, verify_count) = if verify_remote {
verify_prunable(cwd, &prunable, verify_unreachable)?
} else {
(prunable.clone(), Vec::new(), 0usize)
};
let mut summary = format!("{local_count} local objects, {retained_count} retained");
if verify_count > 0 {
summary.push_str(&format!(", {verify_count} verified with remote"));
}
if !missing_on_remote.is_empty() {
summary.push_str(&format!(", {} not on remote", missing_on_remote.len()));
}
summary.push_str(", done.");
println!("{summary}");
if !missing_on_remote.is_empty() {
println!("These objects to be pruned are missing on remote:");
for oid in &missing_on_remote {
println!(" * {oid}");
}
if !opts.continue_when_unverified {
return Err(PruneError::UnverifiedHalt);
}
let missing: HashSet<&Oid> = missing_on_remote.iter().collect();
delete_list.retain(|(oid, _)| !missing.contains(oid));
}
if delete_list.is_empty() {
return Ok(());
}
let delete_total_size: u64 = delete_list.iter().map(|(_, s)| *s).sum();
if opts.dry_run {
println!(
"{} files would be pruned ({})",
delete_list.len(),
humanize(delete_total_size),
);
if opts.verbose {
for (oid, size) in &delete_list {
println!(" * {oid} ({})", humanize(*size));
}
}
return Ok(());
}
if opts.verbose {
for (oid, size) in &delete_list {
println!(" * {oid} ({})", humanize(*size));
}
}
let total = delete_list.len();
let mut deleted = 0usize;
let mut failed: Vec<(Oid, std::io::Error)> = Vec::new();
for (oid, _) in &delete_list {
let path = store.object_path(*oid);
match std::fs::remove_file(&path) {
Ok(()) => deleted += 1,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
deleted += 1;
}
Err(e) => failed.push((*oid, e)),
}
}
for (oid, e) in &failed {
eprintln!("git-lfs: failed to remove {oid}: {e}");
}
println!("Deleting objects: 100% ({deleted}/{total}), done.");
Ok(())
}
type VerifyOutcome = (Vec<(Oid, u64)>, Vec<Oid>, usize);
fn verify_prunable(
cwd: &Path,
prunable: &[(Oid, u64)],
verify_unreachable: bool,
) -> Result<VerifyOutcome, PruneError> {
use git_lfs_api::ObjectSpec;
let fetcher = LfsFetcher::from_repo(cwd, &Store::new(git_lfs_git::lfs_dir(cwd)?))?;
let specs: Vec<ObjectSpec> = prunable
.iter()
.map(|(oid, size)| ObjectSpec {
oid: oid.to_string(),
size: *size,
})
.collect();
let verified: HashSet<Oid> = fetcher
.check_server_can_download(specs)
.map_err(|e| PruneError::Verify(e.to_string()))?
.iter()
.filter_map(|s| s.parse().ok())
.collect();
let verify_count = verified.len();
let reachable: HashSet<Oid> = if verify_unreachable {
HashSet::new()
} else {
scan_reachable_pointers(cwd)?
};
let mut delete_list: Vec<(Oid, u64)> = Vec::new();
let mut missing: Vec<Oid> = Vec::new();
for (oid, size) in prunable {
if verified.contains(oid) {
delete_list.push((*oid, *size));
} else if verify_unreachable || reachable.contains(oid) {
missing.push(*oid);
} else {
delete_list.push((*oid, *size));
}
}
Ok((delete_list, missing, verify_count))
}
fn scan_reachable_pointers(cwd: &Path) -> Result<HashSet<Oid>, PruneError> {
let entries = scan_pointers_with_args(cwd, &[], &[], &["--all"])?;
Ok(entries.into_iter().map(|e| e.oid).collect())
}
fn build_retain_set(cwd: &Path, opts: &Options) -> Result<HashSet<Oid>, PruneError> {
let cfg = FetchPruneConfig::from_repo(cwd);
let include_set = fetch_filter_set(cwd, "lfs.fetchinclude")?;
let exclude_set = fetch_filter_set(cwd, "lfs.fetchexclude")?;
let mut retained: HashSet<Oid> = HashSet::new();
let keep = |entry: git_lfs_git::scanner::PointerEntry, retained: &mut HashSet<Oid>| {
if paths_pass_filter(&entry.paths, &include_set, &exclude_set) {
retained.insert(entry.oid);
}
};
let head_present = head_exists(cwd);
let wts = git_lfs_git::refs::worktrees(cwd);
let head_sha = head_present.then(|| current_head_sha(cwd)).flatten();
if !opts.force {
let mut seen_shas: HashSet<String> = HashSet::new();
if head_present {
for entry in scan_tree(cwd, "HEAD")? {
keep(entry, &mut retained);
}
if let Some(sha) = &head_sha {
seen_shas.insert(sha.clone());
}
}
for wt in &wts {
let Some(head) = wt.head.as_deref() else {
continue;
};
if !seen_shas.insert(head.to_owned()) {
continue;
}
for entry in scan_tree(cwd, head)? {
keep(entry, &mut retained);
}
}
}
let do_recent = !opts.force && !opts.recent;
let mut anchors: Vec<String> = if head_present {
vec!["HEAD".to_owned()]
} else {
Vec::new()
};
if do_recent && cfg.fetch_recent_refs_days > 0 {
let day = Duration::from_secs(86_400);
let prune_ref_days = cfg.fetch_recent_refs_days + cfg.prune_offset_days;
let since = SystemTime::now() - day * prune_ref_days as u32;
let recent = git_lfs_git::refs::recent_branches(
cwd,
since,
cfg.fetch_recent_refs_include_remotes,
None,
)?;
for r in recent {
if !anchors.contains(&r.full) {
anchors.push(r.full.clone());
}
for entry in scan_tree(cwd, &r.full)? {
keep(entry, &mut retained);
}
}
}
if do_recent && cfg.fetch_recent_commits_days > 0 {
let day = Duration::from_secs(86_400);
let prune_commit_days = cfg.fetch_recent_commits_days + cfg.prune_offset_days;
for r in &anchors {
let Some(tip_unix) = ref_tip_unix(cwd, r) else {
continue;
};
let commits_since = SystemTime::UNIX_EPOCH + Duration::from_secs(tip_unix as u64)
- day * prune_commit_days as u32;
for entry in scan_previous_versions(cwd, r, commits_since)? {
keep(entry, &mut retained);
}
}
}
if head_present {
for entry in scan_index_pointers(cwd, "HEAD")? {
keep(entry, &mut retained);
}
}
for wt in &wts {
if wt.prunable {
continue;
}
if wt.dir == cwd {
continue;
}
let Some(head) = wt.head.as_deref() else {
continue;
};
for entry in scan_index_pointers(&wt.dir, head).unwrap_or_default() {
keep(entry, &mut retained);
}
}
for entry in scan_stashed(cwd)? {
retained.insert(entry.oid);
}
if head_present {
let excludes = remote_tracking_refs(cwd, &cfg.prune_remote_name).unwrap_or_default();
let includes = local_branches_and_tags(cwd).unwrap_or_else(|_| vec!["HEAD".to_owned()]);
if !includes.is_empty() {
let include_refs: Vec<&str> = includes.iter().map(String::as_str).collect();
let exclude_refs: Vec<&str> = excludes.iter().map(String::as_str).collect();
for entry in scan_pointers(cwd, &include_refs, &exclude_refs)? {
retained.insert(entry.oid);
}
}
}
Ok(retained)
}
fn local_branches_and_tags(cwd: &Path) -> std::io::Result<Vec<String>> {
let out = std::process::Command::new("git")
.arg("-C")
.arg(cwd)
.args([
"for-each-ref",
"--format=%(refname)",
"refs/heads/",
"refs/tags/",
])
.output()?;
if !out.status.success() {
return Ok(Vec::new());
}
Ok(String::from_utf8_lossy(&out.stdout)
.lines()
.filter(|l| !l.is_empty())
.map(str::to_owned)
.collect())
}
fn head_exists(cwd: &Path) -> bool {
std::process::Command::new("git")
.arg("-C")
.arg(cwd)
.args(["rev-parse", "--verify", "--quiet", "HEAD"])
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
fn current_head_sha(cwd: &Path) -> Option<String> {
let out = std::process::Command::new("git")
.arg("-C")
.arg(cwd)
.args(["rev-parse", "HEAD"])
.output()
.ok()?;
if !out.status.success() {
return None;
}
let s = String::from_utf8_lossy(&out.stdout).trim().to_owned();
if s.is_empty() { None } else { Some(s) }
}
fn ref_tip_unix(cwd: &Path, reference: &str) -> Option<i64> {
let out = std::process::Command::new("git")
.arg("-C")
.arg(cwd)
.args(["log", "-1", "--format=%ct", reference])
.output()
.ok()?;
if !out.status.success() {
return None;
}
String::from_utf8_lossy(&out.stdout).trim().parse().ok()
}
fn humanize(n: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB", "PB"];
if n < 1024 {
return format!("{n} B");
}
let mut value = n as f64;
let mut i = 0;
while value >= 1024.0 && i + 1 < UNITS.len() {
value /= 1024.0;
i += 1;
}
format!("{value:.2} {}", UNITS[i])
}