void-cli 0.0.4

CLI for void — anonymous encrypted source control
//! Garbage collection - find and quarantine unreferenced objects.
//!
//! Uses fsck with `find_unreferenced: true` to identify objects that are
//! not referenced by any commit, then moves them to a quarantine directory.
//!
//! # Safety
//!
//! Objects are moved to `.void/gc/` rather than deleted. This prevents
//! data loss from bugs in the reachability analysis. The quarantine
//! directory can be manually deleted after verification.
//!
//! TODO: Once the codebase reaches maturity and the reachability analysis
//! is proven reliable, add a `--purge` flag to permanently delete objects.

use std::fs;
use std::path::Path;

use camino::Utf8PathBuf;
use serde::Serialize;
use void_core::ops::fsck::{self as core_fsck, FsckOptions, FsckWarning};

use crate::context::{open_repo, void_err_to_cli};
use crate::output::{run_command, CliError, CliOptions};

/// Command-line arguments for gc.
#[derive(Debug)]
pub struct GcArgs {
    /// List unreferenced objects without moving.
    pub dry_run: bool,
}

/// JSON output for the gc command.
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GcOutput {
    /// Number of objects quarantined (or would be in dry-run).
    pub quarantined: usize,
    /// Total bytes moved to quarantine.
    pub quarantined_bytes: u64,
    /// CIDs of quarantined/unreferenced objects.
    pub objects: Vec<String>,
    /// Whether this was a dry run.
    pub dry_run: bool,
    /// Path to the quarantine directory.
    pub quarantine_dir: String,
}

/// Returns the path for a CID in the objects directory.
fn object_path(objects_dir: &Utf8PathBuf, cid_str: &str) -> Utf8PathBuf {
    let prefix = &cid_str[..2.min(cid_str.len())];
    objects_dir.join(prefix).join(cid_str)
}

/// Returns the quarantine path for a CID.
fn quarantine_path(gc_dir: &Utf8PathBuf, cid_str: &str) -> Utf8PathBuf {
    let prefix = &cid_str[..2.min(cid_str.len())];
    gc_dir.join(prefix).join(cid_str)
}

/// Run the gc command.
///
/// # Arguments
///
/// * `cwd` - Current working directory
/// * `args` - GC arguments
/// * `opts` - CLI options
pub fn run(cwd: &Path, args: GcArgs, opts: &CliOptions) -> Result<(), CliError> {
    run_command("gc", opts, |ctx| {
        ctx.progress("Scanning for unreferenced objects...");

        let repo = open_repo(cwd)?;

        // Run fsck with find_unreferenced enabled
        let options = FsckOptions {
            find_unreferenced: true,
            verify_content_hashes: false,
            observer: None,
        };

        let result = core_fsck::fsck(repo.context(), &options).map_err(void_err_to_cli)?;

        // Extract unreferenced CIDs from warnings
        let unreferenced_cids: Vec<String> = result
            .warnings
            .iter()
            .filter_map(|w| {
                if let FsckWarning::UnreferencedObject { cid } = w {
                    Some(cid.clone())
                } else {
                    None
                }
            })
            .collect();

        let void_dir_utf8 = repo.void_dir().to_owned();
        let objects_dir = void_dir_utf8.join("objects");
        let gc_dir = void_dir_utf8.join("gc");

        // Calculate sizes and optionally quarantine
        let mut quarantined = 0;
        let mut quarantined_bytes: u64 = 0;

        if args.dry_run {
            // Dry run: just calculate sizes
            for cid_str in &unreferenced_cids {
                let path = object_path(&objects_dir, cid_str);
                if let Ok(metadata) = fs::metadata(path.as_std_path()) {
                    quarantined_bytes += metadata.len();
                    quarantined += 1;
                }
            }

            // Human-readable output
            if !ctx.use_json() {
                if unreferenced_cids.is_empty() {
                    ctx.info("No unreferenced objects found.");
                } else {
                    ctx.info(format!(
                        "Would quarantine {} objects ({} bytes) to {}",
                        quarantined, quarantined_bytes, gc_dir
                    ));
                    ctx.info("(Objects are moved, not deleted. Manually rm -rf the gc dir to reclaim space.)");
                    for cid_str in &unreferenced_cids {
                        let short_cid = if cid_str.len() > 16 {
                            &cid_str[..16]
                        } else {
                            cid_str
                        };
                        ctx.info(format!("  {}...", short_cid));
                    }
                }
            }
        } else {
            // Move objects to quarantine directory
            // TODO: Once reachability analysis is proven reliable, add --purge flag
            // to permanently delete objects instead of quarantining.
            if !unreferenced_cids.is_empty() {
                ctx.progress("Quarantining unreferenced objects...");

                for cid_str in &unreferenced_cids {
                    let src_path = object_path(&objects_dir, cid_str);
                    let dst_path = quarantine_path(&gc_dir, cid_str);

                    // Get size before moving
                    let size = fs::metadata(src_path.as_std_path())
                        .map(|m| m.len())
                        .unwrap_or(0);

                    // Create destination directory if needed
                    if let Some(parent) = dst_path.parent() {
                        if let Err(e) = fs::create_dir_all(parent.as_std_path()) {
                            ctx.warn(format!("Failed to create quarantine dir: {}", e));
                            continue;
                        }
                    }

                    // Move the file
                    if let Err(e) = fs::rename(src_path.as_std_path(), dst_path.as_std_path()) {
                        ctx.warn(format!("Failed to quarantine {}: {}", cid_str, e));
                        continue;
                    }

                    quarantined += 1;
                    quarantined_bytes += size;
                }

                if !ctx.use_json() {
                    ctx.info(format!(
                        "Quarantined {} objects ({} bytes) to {}",
                        quarantined, quarantined_bytes, gc_dir
                    ));
                    ctx.info("To reclaim disk space: rm -rf .void/gc");
                }
            } else if !ctx.use_json() {
                ctx.info("No unreferenced objects found.");
            }
        }

        Ok(GcOutput {
            quarantined,
            quarantined_bytes,
            objects: unreferenced_cids,
            dry_run: args.dry_run,
            quarantine_dir: gc_dir.to_string(),
        })
    })
}