use std::collections::HashMap;
use std::fs;
use std::path::Path;
use serde::Serialize;
use void_core::cid as void_cid;
use void_core::metadata::MetadataBundle;
use void_core::ops::traversal::{walk_all_refs, WalkedCommit};
use void_core::crypto::{EncryptedCommit, EncryptedMetadata, EncryptedShard};
use void_core::store::{FsStore, ObjectStoreExt};
use void_core::support::ToVoidCid;
use crate::context::{open_repo, void_err_to_cli};
use crate::output::{run_command, CliError, CliOptions};
#[derive(Debug)]
pub struct AuditArgs {
pub max_commits: usize,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AuditOutput {
pub total_objects: usize,
pub by_type: ObjectTypeBreakdown,
pub by_format: FormatBreakdown,
pub total_bytes: u64,
pub objects: Vec<ObjectInfo>,
}
#[derive(Debug, Serialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct ObjectTypeBreakdown {
pub commits: usize,
pub metadata: usize,
pub shards: usize,
pub unknown: usize,
}
#[derive(Debug, Serialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct FormatBreakdown {
pub vd01: usize,
pub legacy: usize,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ObjectInfo {
pub cid: String,
pub object_type: String,
pub format: String,
pub size: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent_commit: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
struct ObjectIndex {
objects: HashMap<String, (String, Option<String>, String)>,
}
impl ObjectIndex {
fn new() -> Self {
Self {
objects: HashMap::new(),
}
}
fn insert(&mut self, cid: String, obj_type: &str, parent: Option<String>, format: &str) {
self.objects
.insert(cid, (obj_type.to_string(), parent, format.to_string()));
}
fn get(&self, cid: &str) -> Option<&(String, Option<String>, String)> {
self.objects.get(cid)
}
}
fn detect_format(encrypted: &[u8]) -> &'static str {
if encrypted.len() > 4 && encrypted.starts_with(b"VD01") {
"vd01"
} else {
"legacy"
}
}
pub fn run(cwd: &Path, args: AuditArgs, opts: &CliOptions) -> Result<(), CliError> {
run_command("audit", opts, |ctx| {
ctx.progress("Auditing repository objects...");
let repo = open_repo(cwd)?;
let void_dir = repo.void_dir().to_owned();
let objects_dir = void_dir.join("objects");
let store = FsStore::new(objects_dir.clone()).map_err(void_err_to_cli)?;
ctx.progress("Walking commit history...");
let mut index = ObjectIndex::new();
let walker = walk_all_refs(&store, repo.vault(), &void_dir, Some(args.max_commits))
.map_err(void_err_to_cli)?;
let mut commits_walked = 0;
for result in walker {
let walked: WalkedCommit = match result {
Ok(w) => w,
Err(e) => {
ctx.warn(format!("Error walking commit: {}", e));
continue;
}
};
commits_walked += 1;
if commits_walked % 100 == 0 {
ctx.progress(format!("Walked {} commits...", commits_walked));
}
let commit_encrypted: EncryptedCommit = match store.get_blob(&walked.cid) {
Ok(data) => data,
Err(_) => continue,
};
let commit_format = detect_format(commit_encrypted.as_bytes());
index.insert(walked.cid_str.clone(), "commit", None, commit_format);
let metadata_cid = match walked.commit.metadata_bundle.to_void_cid() {
Ok(c) => c,
Err(_) => continue,
};
let metadata_cid_str = metadata_cid.to_string();
let metadata_encrypted: EncryptedMetadata = match store.get_blob(&metadata_cid) {
Ok(data) => data,
Err(_) => continue,
};
let metadata_format = detect_format(metadata_encrypted.as_bytes());
index.insert(
metadata_cid_str.clone(),
"metadata",
Some(walked.cid_str.clone()),
metadata_format,
);
let bundle: MetadataBundle =
match walked.reader.decrypt_metadata::<MetadataBundle>(&metadata_encrypted) {
Ok(b) => b,
Err(_) => continue,
};
for range in &bundle.shard_map.ranges {
if let Some(ref shard_cid_typed) = range.cid {
if let Ok(shard_cid) = void_cid::from_bytes(shard_cid_typed.as_bytes()) {
let shard_cid_str = shard_cid.to_string();
let shard_format = match store.get_blob::<EncryptedShard>(&shard_cid) {
Ok(data) => detect_format(data.as_bytes()),
Err(_) => "unknown",
};
index.insert(
shard_cid_str,
"shard",
Some(walked.cid_str.clone()),
shard_format,
);
}
}
}
}
ctx.progress(format!(
"Walked {} commits, scanning objects...",
commits_walked
));
let mut objects = Vec::new();
let mut by_type = ObjectTypeBreakdown::default();
let mut by_format = FormatBreakdown::default();
let mut total_bytes: u64 = 0;
let Ok(prefixes) = fs::read_dir(&objects_dir) else {
return Err(CliError::internal(
"cannot read objects directory".to_string(),
));
};
for prefix_entry in prefixes.flatten() {
let prefix_path = prefix_entry.path();
if !prefix_path.is_dir() {
continue;
}
let Ok(files) = fs::read_dir(&prefix_path) else {
continue;
};
for file_entry in files.flatten() {
let file_name = match file_entry.file_name().to_str() {
Some(n) => n.to_string(),
None => continue,
};
if file_name.ends_with(".tmp") {
continue;
}
let cid_str = file_name;
let size = fs::metadata(file_entry.path())
.map(|m| m.len())
.unwrap_or(0);
total_bytes += size;
let (obj_type, parent_commit, format) = if let Some((t, p, f)) = index.get(&cid_str)
{
(t.as_str(), p.clone(), f.as_str())
} else {
let format = if let Ok(data) = fs::read(file_entry.path()) {
detect_format(&data)
} else {
"unknown"
};
("unknown", None, format)
};
match obj_type {
"commit" => by_type.commits += 1,
"metadata" => by_type.metadata += 1,
"shard" => by_type.shards += 1,
_ => by_type.unknown += 1,
}
match format {
"vd01" => by_format.vd01 += 1,
"legacy" => by_format.legacy += 1,
_ => {}
}
objects.push(ObjectInfo {
cid: cid_str,
object_type: obj_type.to_string(),
format: format.to_string(),
size,
parent_commit,
error: if obj_type == "unknown" {
Some("Object not found in commit history - may be orphaned".to_string())
} else {
None
},
});
}
}
objects.sort_by(|a, b| {
a.object_type
.cmp(&b.object_type)
.then_with(|| a.cid.cmp(&b.cid))
});
ctx.progress(format!(
"Audit complete: {} objects ({} commits, {} metadata, {} shards, {} unknown)",
objects.len(),
by_type.commits,
by_type.metadata,
by_type.shards,
by_type.unknown
));
Ok(AuditOutput {
total_objects: objects.len(),
by_type,
by_format,
total_bytes,
objects,
})
})
}