void-cli 0.0.4

CLI for void — anonymous encrypted source control
//! Machine-readable repository audit command.
//!
//! Outputs JSON categorization of all objects in the repository.
//! This enables agents and scripts to audit repositories programmatically.

use std::collections::HashMap;
use std::fs;
use std::path::Path;

use serde::Serialize;
use void_core::cid as void_cid;
use void_core::metadata::MetadataBundle;
use void_core::ops::traversal::{walk_all_refs, WalkedCommit};
use void_core::crypto::{EncryptedCommit, EncryptedMetadata, EncryptedShard};
use void_core::store::{FsStore, ObjectStoreExt};
use void_core::support::ToVoidCid;

use crate::context::{open_repo, void_err_to_cli};
use crate::output::{run_command, CliError, CliOptions};

/// Command-line arguments for audit.
#[derive(Debug)]
pub struct AuditArgs {
    /// Maximum commits to traverse (default: 10000)
    pub max_commits: usize,
}

/// JSON output for the audit command.
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AuditOutput {
    /// Total objects in repository
    pub total_objects: usize,
    /// Breakdown by type
    pub by_type: ObjectTypeBreakdown,
    /// Breakdown by format (VD01 vs legacy)
    pub by_format: FormatBreakdown,
    /// Total bytes
    pub total_bytes: u64,
    /// List of all objects with details
    pub objects: Vec<ObjectInfo>,
}

/// Breakdown by object type.
#[derive(Debug, Serialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct ObjectTypeBreakdown {
    pub commits: usize,
    pub metadata: usize,
    pub shards: usize,
    pub unknown: usize,
}

/// Breakdown by encryption format.
#[derive(Debug, Serialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct FormatBreakdown {
    pub vd01: usize,
    pub legacy: usize,
}

/// Information about a single object.
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ObjectInfo {
    pub cid: String,
    pub object_type: String,
    pub format: String,
    pub size: u64,
    /// Parent commit CID (for metadata/shards)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub parent_commit: Option<String>,
    /// Error message if object couldn't be fully audited
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,
}

/// Index mapping object CIDs to their types and parent commits.
struct ObjectIndex {
    /// CID -> (type, parent_commit, format)
    objects: HashMap<String, (String, Option<String>, String)>,
}

impl ObjectIndex {
    fn new() -> Self {
        Self {
            objects: HashMap::new(),
        }
    }

    fn insert(&mut self, cid: String, obj_type: &str, parent: Option<String>, format: &str) {
        self.objects
            .insert(cid, (obj_type.to_string(), parent, format.to_string()));
    }

    fn get(&self, cid: &str) -> Option<&(String, Option<String>, String)> {
        self.objects.get(cid)
    }
}

/// Detect VD01 envelope format from encrypted bytes.
fn detect_format(encrypted: &[u8]) -> &'static str {
    if encrypted.len() > 4 && encrypted.starts_with(b"VD01") {
        "vd01"
    } else {
        "legacy"
    }
}

/// Run the audit command.
pub fn run(cwd: &Path, args: AuditArgs, opts: &CliOptions) -> Result<(), CliError> {
    run_command("audit", opts, |ctx| {
        ctx.progress("Auditing repository objects...");

        let repo = open_repo(cwd)?;

        let void_dir = repo.void_dir().to_owned();
        let objects_dir = void_dir.join("objects");

        let store = FsStore::new(objects_dir.clone()).map_err(void_err_to_cli)?;

        // Build index by walking commit history
        ctx.progress("Walking commit history...");
        let mut index = ObjectIndex::new();

        // Walk all commits
        let walker = walk_all_refs(&store, repo.vault(), &void_dir, Some(args.max_commits))
            .map_err(void_err_to_cli)?;

        let mut commits_walked = 0;
        for result in walker {
            let walked: WalkedCommit = match result {
                Ok(w) => w,
                Err(e) => {
                    ctx.warn(format!("Error walking commit: {}", e));
                    continue;
                }
            };

            commits_walked += 1;
            if commits_walked % 100 == 0 {
                ctx.progress(format!("Walked {} commits...", commits_walked));
            }

            // Get commit format
            let commit_encrypted: EncryptedCommit = match store.get_blob(&walked.cid) {
                Ok(data) => data,
                Err(_) => continue,
            };
            let commit_format = detect_format(commit_encrypted.as_bytes());

            // Index commit
            index.insert(walked.cid_str.clone(), "commit", None, commit_format);

            // Get metadata CID
            let metadata_cid = match walked.commit.metadata_bundle.to_void_cid() {
                Ok(c) => c,
                Err(_) => continue,
            };
            let metadata_cid_str = metadata_cid.to_string();

            // Load metadata
            let metadata_encrypted: EncryptedMetadata = match store.get_blob(&metadata_cid) {
                Ok(data) => data,
                Err(_) => continue,
            };
            let metadata_format = detect_format(metadata_encrypted.as_bytes());

            // Index metadata
            index.insert(
                metadata_cid_str.clone(),
                "metadata",
                Some(walked.cid_str.clone()),
                metadata_format,
            );

            // Decrypt and index shards
            let bundle: MetadataBundle =
                match walked.reader.decrypt_metadata::<MetadataBundle>(&metadata_encrypted) {
                    Ok(b) => b,
                    Err(_) => continue,
                };

            for range in &bundle.shard_map.ranges {
                if let Some(ref shard_cid_typed) = range.cid {
                    if let Ok(shard_cid) = void_cid::from_bytes(shard_cid_typed.as_bytes()) {
                        let shard_cid_str = shard_cid.to_string();

                        // Get shard format
                        let shard_format = match store.get_blob::<EncryptedShard>(&shard_cid) {
                            Ok(data) => detect_format(data.as_bytes()),
                            Err(_) => "unknown",
                        };

                        index.insert(
                            shard_cid_str,
                            "shard",
                            Some(walked.cid_str.clone()),
                            shard_format,
                        );
                    }
                }
            }
        }

        ctx.progress(format!(
            "Walked {} commits, scanning objects...",
            commits_walked
        ));

        // Now scan all objects and categorize
        let mut objects = Vec::new();
        let mut by_type = ObjectTypeBreakdown::default();
        let mut by_format = FormatBreakdown::default();
        let mut total_bytes: u64 = 0;

        // Read all objects from disk
        let Ok(prefixes) = fs::read_dir(&objects_dir) else {
            return Err(CliError::internal(
                "cannot read objects directory".to_string(),
            ));
        };

        for prefix_entry in prefixes.flatten() {
            let prefix_path = prefix_entry.path();
            if !prefix_path.is_dir() {
                continue;
            }

            let Ok(files) = fs::read_dir(&prefix_path) else {
                continue;
            };

            for file_entry in files.flatten() {
                let file_name = match file_entry.file_name().to_str() {
                    Some(n) => n.to_string(),
                    None => continue,
                };

                // Skip temp files
                if file_name.ends_with(".tmp") {
                    continue;
                }

                let cid_str = file_name;

                // Get file size
                let size = fs::metadata(file_entry.path())
                    .map(|m| m.len())
                    .unwrap_or(0);
                total_bytes += size;

                // Look up in index
                let (obj_type, parent_commit, format) = if let Some((t, p, f)) = index.get(&cid_str)
                {
                    (t.as_str(), p.clone(), f.as_str())
                } else {
                    // Object not in index - read it to determine format at least
                    let format = if let Ok(data) = fs::read(file_entry.path()) {
                        detect_format(&data)
                    } else {
                        "unknown"
                    };
                    ("unknown", None, format)
                };

                // Update counters
                match obj_type {
                    "commit" => by_type.commits += 1,
                    "metadata" => by_type.metadata += 1,
                    "shard" => by_type.shards += 1,
                    _ => by_type.unknown += 1,
                }

                match format {
                    "vd01" => by_format.vd01 += 1,
                    "legacy" => by_format.legacy += 1,
                    _ => {}
                }

                objects.push(ObjectInfo {
                    cid: cid_str,
                    object_type: obj_type.to_string(),
                    format: format.to_string(),
                    size,
                    parent_commit,
                    error: if obj_type == "unknown" {
                        Some("Object not found in commit history - may be orphaned".to_string())
                    } else {
                        None
                    },
                });
            }
        }

        // Sort objects by type, then CID
        objects.sort_by(|a, b| {
            a.object_type
                .cmp(&b.object_type)
                .then_with(|| a.cid.cmp(&b.cid))
        });

        ctx.progress(format!(
            "Audit complete: {} objects ({} commits, {} metadata, {} shards, {} unknown)",
            objects.len(),
            by_type.commits,
            by_type.metadata,
            by_type.shards,
            by_type.unknown
        ));

        Ok(AuditOutput {
            total_objects: objects.len(),
            by_type,
            by_format,
            total_bytes,
            objects,
        })
    })
}