use std::collections::VecDeque;
use std::fs;
use thiserror::Error;
use void_core::{
cid as void_cid,
cid::VoidCid,
collab::WrappedKey,
crypto::{
reader::collect_ancestor_content_keys_vault,
EncryptedShard,
},
metadata::{Commit, MetadataBundle},
support::ToVoidCid,
store::{FsStore, ObjectStoreExt},
VoidContext,
};
#[derive(Debug, Error)]
pub enum VoidBackendError {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("void error: {0}")]
Void(#[from] void_core::VoidError),
}
pub type Result<T> = std::result::Result<T, VoidBackendError>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ObjectType {
Commit,
Metadata,
Manifest,
RepoManifest,
Shard,
Unknown,
}
impl ObjectType {
pub fn as_str(&self) -> &'static str {
match self {
ObjectType::Commit => "commit",
ObjectType::Metadata => "metadata",
ObjectType::Manifest => "manifest",
ObjectType::RepoManifest => "repo-manifest",
ObjectType::Shard => "shard",
ObjectType::Unknown => "unknown",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Format {
CommitV1,
ShardV1,
MetadataV1,
ManifestV1,
RepoManifestV1,
Unknown,
}
impl Format {
pub fn as_str(&self) -> &'static str {
match self {
Format::CommitV1 => "commit/v1",
Format::ShardV1 => "shard/v1",
Format::MetadataV1 => "metadata/v1",
Format::ManifestV1 => "manifest/v1",
Format::RepoManifestV1 => "repo-manifest/v1",
Format::Unknown => "unknown",
}
}
pub fn is_known(&self) -> bool {
!matches!(self, Format::Unknown)
}
}
#[derive(Debug, Clone)]
pub struct ObjectInfo {
pub cid: String,
pub object_type: ObjectType,
pub format: Format,
pub encrypted_size: usize,
}
impl ObjectInfo {
pub fn short_cid(&self) -> &str {
&self.cid[..self.cid.len().min(12)]
}
}
#[derive(Debug, Clone)]
pub struct CommitAudit {
pub message: String,
pub timestamp: u64,
pub parent_cid: Option<String>,
pub metadata_cid: String,
pub is_signed: bool,
pub author: Option<String>,
}
#[derive(Debug, Clone)]
pub struct MetadataAudit {
pub version: u32,
pub range_count: usize,
pub shards: Vec<ShardRef>,
pub parent_commit: Option<ParentCommitInfo>,
}
#[derive(Debug, Clone)]
pub struct ShardRef {
pub shard_id: u32,
pub cid: String,
}
#[derive(Debug, Clone)]
pub struct ShardAudit {
pub version: u32,
pub entry_count: u32,
pub dir_count: u32,
pub body_compressed: u32,
pub body_decompressed: u32,
pub entries: Vec<ShardEntry>,
pub parent_commit: Option<ParentCommitInfo>,
}
#[derive(Debug, Clone)]
pub struct ShardEntry {
pub path: String,
pub size: u64,
pub lines: u32,
}
#[derive(Debug, Clone)]
pub struct ParentCommitInfo {
pub cid: String,
pub message: String,
pub timestamp: u64,
}
#[derive(Debug, Clone)]
pub struct ManifestFileEntry {
pub path: String,
pub size: u64,
pub lines: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone)]
pub struct ManifestShardInfo {
pub cid: String,
pub size_compressed: u64,
pub size_decompressed: u64,
pub file_count: usize,
}
#[derive(Debug, Clone)]
pub struct ManifestAudit {
pub file_count: u64,
pub total_bytes: u64,
pub shard_count: usize,
pub files: Vec<ManifestFileEntry>,
pub shards: Vec<ManifestShardInfo>,
pub parent_commit: Option<ParentCommitInfo>,
}
#[derive(Debug, Clone)]
pub struct RepoManifestAudit {
pub encrypted_size: usize,
pub parent_commit: Option<ParentCommitInfo>,
}
#[derive(Debug, Clone)]
pub enum AuditResult {
Commit(CommitAudit),
Metadata(MetadataAudit),
Manifest(ManifestAudit),
RepoManifest(RepoManifestAudit),
Shard(ShardAudit),
Error(String),
}
#[derive(Debug, Clone)]
pub struct CommitIndex {
pub commit_cid: String,
pub message: String,
pub timestamp: u64,
pub metadata_cid: String,
pub shard_cids: Vec<(String, Option<WrappedKey>)>,
}
pub enum IndexedObject<'a> {
Commit,
Metadata(&'a CommitIndex),
Manifest(&'a CommitIndex),
RepoManifest(&'a CommitIndex),
Shard(&'a CommitIndex, Option<&'a WrappedKey>),
Unknown,
}
#[derive(Debug, Default)]
pub struct ObjectIndex {
pub metadata_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
pub manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
pub repo_manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
pub shard_to_commit: rustc_hash::FxHashMap<String, (CommitIndex, Option<WrappedKey>)>,
pub commit_cids: rustc_hash::FxHashSet<String>,
}
impl ObjectIndex {
pub fn lookup(&self, cid_str: &str) -> IndexedObject<'_> {
if self.commit_cids.contains(cid_str) {
return IndexedObject::Commit;
}
if let Some(idx) = self.metadata_to_commit.get(cid_str) {
return IndexedObject::Metadata(idx);
}
if let Some(idx) = self.manifest_to_commit.get(cid_str) {
return IndexedObject::Manifest(idx);
}
if let Some(idx) = self.repo_manifest_to_commit.get(cid_str) {
return IndexedObject::RepoManifest(idx);
}
if let Some((idx, wrapped_key)) = self.shard_to_commit.get(cid_str) {
return IndexedObject::Shard(idx, wrapped_key.as_ref());
}
IndexedObject::Unknown
}
}
pub fn build_index(ctx: &VoidContext, store: &FsStore, max_commits: usize) -> Result<ObjectIndex> {
let mut index = ObjectIndex::default();
let mut visited: rustc_hash::FxHashSet<String> = rustc_hash::FxHashSet::default();
let mut queue: VecDeque<VoidCid> = VecDeque::new();
if let Ok(Some(head_cid)) = ctx.resolve_head() {
if let Ok(cid) = void_cid::from_bytes(head_cid.as_bytes()) {
queue.push_back(cid);
}
}
let refs_dir = ctx.paths.void_dir.join("refs/heads");
if refs_dir.exists() {
if let Ok(entries) = fs::read_dir(&refs_dir) {
for entry in entries.flatten() {
if let Ok(content) = fs::read_to_string(entry.path()) {
if let Ok(branch_cid) = void_cid::parse(content.trim()) {
queue.push_back(branch_cid);
}
}
}
}
}
let mut commits_processed = 0;
while let Some(commit_cid) = queue.pop_front() {
let commit_cid_str = commit_cid.to_string();
if visited.contains(&commit_cid_str) {
continue;
}
visited.insert(commit_cid_str.clone());
if commits_processed >= max_commits {
continue;
}
commits_processed += 1;
index.commit_cids.insert(commit_cid_str.clone());
let (commit, bundle, _reader) = match ctx.load_commit_with_metadata(store, &commit_cid) {
Ok(r) => r,
Err(_) => {
if let Ok((commit, _)) = ctx.load_commit(store, &commit_cid) {
queue_parents(&commit, &mut queue);
}
continue;
}
};
let metadata_cid_str = commit.metadata_bundle.to_void_cid()
.map(|c| c.to_string())
.unwrap_or_default();
let mut shard_cids = Vec::new();
for range in &bundle.shard_map.ranges {
if let Some(ref shard_cid_typed) = range.cid {
if let Ok(shard_cid) = void_cid::from_bytes(shard_cid_typed.as_bytes()) {
shard_cids.push((shard_cid.to_string(), range.wrapped_key.clone()));
}
}
}
let commit_index = CommitIndex {
commit_cid: commit_cid_str,
message: commit.message.clone(),
timestamp: commit.timestamp,
metadata_cid: metadata_cid_str.clone(),
shard_cids: shard_cids.clone(),
};
index.metadata_to_commit.insert(metadata_cid_str, commit_index.clone());
for (shard_cid_str, wrapped_key) in shard_cids {
index.shard_to_commit.insert(shard_cid_str, (commit_index.clone(), wrapped_key));
}
if let Some(ref manifest_cid_bytes) = commit.manifest_cid {
if let Ok(manifest_cid) = manifest_cid_bytes.to_void_cid() {
index.manifest_to_commit.insert(manifest_cid.to_string(), commit_index.clone());
}
}
if let Some(ref rm_cid_bytes) = commit.repo_manifest_cid {
if let Ok(rm_cid) = rm_cid_bytes.to_void_cid() {
index.repo_manifest_to_commit.insert(rm_cid.to_string(), commit_index.clone());
}
}
queue_parents(&commit, &mut queue);
}
Ok(index)
}
pub fn list_all_objects(ctx: &VoidContext) -> Vec<String> {
let objects_dir = ctx.paths.void_dir.join("objects");
let mut cids = Vec::new();
let Ok(prefixes) = fs::read_dir(&objects_dir) else {
return cids;
};
for prefix_entry in prefixes.flatten() {
let prefix_path = prefix_entry.path();
if !prefix_path.is_dir() {
continue;
}
let Ok(objects) = fs::read_dir(&prefix_path) else {
continue;
};
for obj_entry in objects.flatten() {
if let Some(name) = obj_entry.file_name().to_str() {
if !name.ends_with(".tmp") {
cids.push(name.to_string());
}
}
}
}
cids.sort();
cids
}
pub fn categorize_object(store: &FsStore, index: &ObjectIndex, cid_str: &str) -> ObjectInfo {
let cid = match void_cid::parse(cid_str) {
Ok(c) => c,
Err(_) => {
return ObjectInfo {
cid: cid_str.to_string(),
object_type: ObjectType::Unknown,
format: Format::Unknown,
encrypted_size: 0,
};
}
};
let object_type = match index.lookup(cid_str) {
IndexedObject::Commit => ObjectType::Commit,
IndexedObject::Metadata(_) => ObjectType::Metadata,
IndexedObject::Manifest(_) => ObjectType::Manifest,
IndexedObject::RepoManifest(_) => ObjectType::RepoManifest,
IndexedObject::Shard(_, _) => ObjectType::Shard,
IndexedObject::Unknown => ObjectType::Unknown,
};
let encrypted_size = store.get_blob::<EncryptedShard>(&cid)
.map(|b| b.as_bytes().len())
.unwrap_or(0);
ObjectInfo {
cid: cid_str.to_string(),
object_type,
format: Format::Unknown,
encrypted_size,
}
}
pub fn audit_object_indexed(
ctx: &VoidContext,
store: &FsStore,
index: &ObjectIndex,
cid_str: &str,
) -> AuditResult {
let cid = match void_cid::parse(cid_str) {
Ok(c) => c,
Err(e) => return AuditResult::Error(format!("Invalid CID: {e}")),
};
match index.lookup(cid_str) {
IndexedObject::Commit => {
match ctx.load_commit(store, &cid) {
Ok((commit, _)) => audit_commit(&commit),
Err(e) => AuditResult::Error(format!("Failed to decrypt commit: {e}")),
}
}
IndexedObject::Metadata(commit_idx) => {
audit_metadata_indexed(ctx, store, commit_idx)
}
IndexedObject::Manifest(commit_idx) => {
audit_manifest_indexed(ctx, store, commit_idx)
}
IndexedObject::RepoManifest(commit_idx) => {
let size = store.get_blob::<EncryptedShard>(&cid)
.map(|b| b.as_bytes().len())
.unwrap_or(0);
AuditResult::RepoManifest(RepoManifestAudit {
encrypted_size: size,
parent_commit: Some(parent_info(commit_idx)),
})
}
IndexedObject::Shard(commit_idx, wrapped_key) => {
let encrypted: EncryptedShard = match store.get_blob(&cid) {
Ok(d) => d,
Err(e) => return AuditResult::Error(format!("Object not found: {e}")),
};
audit_shard_idxed(ctx, store, &encrypted, wrapped_key, commit_idx, &cid)
}
IndexedObject::Unknown => {
AuditResult::Error("Object not found in commit history - may be orphaned".to_string())
}
}
}
fn audit_commit(commit: &Commit) -> AuditResult {
let parent_cid = commit.parents.first().and_then(|p| {
p.to_void_cid().map(|c| c.to_string()).ok()
});
let metadata_cid = commit.metadata_bundle.to_void_cid()
.map(|c| c.to_string())
.unwrap_or_else(|_| hex::encode(commit.metadata_bundle.as_bytes()));
AuditResult::Commit(CommitAudit {
message: commit.message.clone(),
timestamp: commit.timestamp,
parent_cid,
metadata_cid,
is_signed: commit.is_signed(),
author: commit.author.map(|a| a.to_hex()),
})
}
fn audit_metadata_indexed(
ctx: &VoidContext,
store: &FsStore,
commit_idx: &CommitIndex,
) -> AuditResult {
let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
Ok(c) => c,
Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
};
let (_, metadata, _) = match ctx.load_commit_with_metadata(store, &commit_cid) {
Ok(r) => r,
Err(e) => return AuditResult::Error(format!("Failed to load commit+metadata: {e}")),
};
let shards = extract_shard_refs(&metadata);
AuditResult::Metadata(MetadataAudit {
version: metadata.version,
range_count: metadata.shard_map.ranges.len(),
shards,
parent_commit: Some(parent_info(commit_idx)),
})
}
fn audit_manifest_indexed(
ctx: &VoidContext,
store: &FsStore,
commit_idx: &CommitIndex,
) -> AuditResult {
let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
Ok(c) => c,
Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
};
let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
Ok(r) => r,
Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
};
let manifest = match ctx.load_manifest(store, &commit, &reader) {
Ok(Some(m)) => m,
Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
};
let files: Vec<ManifestFileEntry> = manifest.iter()
.filter_map(|r| r.ok())
.map(|e| ManifestFileEntry {
path: e.path.clone(),
size: e.size,
lines: e.lines,
shard_index: e.shard_index,
})
.collect();
let groups = manifest.entries_by_shard().unwrap_or_default();
let shards: Vec<ManifestShardInfo> = manifest.shards().iter().enumerate()
.map(|(i, s)| {
let cid_str = void_cid::from_bytes(s.cid.as_bytes())
.map(|c| c.to_string())
.unwrap_or_else(|_| hex::encode(s.cid.as_bytes()));
ManifestShardInfo {
cid: cid_str,
size_compressed: s.size_compressed,
size_decompressed: s.size_decompressed,
file_count: groups.get(i).map(|g| g.len()).unwrap_or(0),
}
})
.collect();
AuditResult::Manifest(ManifestAudit {
file_count: manifest.total_files(),
total_bytes: manifest.total_bytes(),
shard_count: manifest.shards().len(),
files,
shards,
parent_commit: Some(parent_info(commit_idx)),
})
}
fn audit_shard_idxed(
ctx: &VoidContext,
store: &FsStore,
encrypted: &EncryptedShard,
wrapped_key: Option<&WrappedKey>,
commit_idx: &CommitIndex,
shard_cid: &VoidCid,
) -> AuditResult {
let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
Ok(c) => c,
Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
};
let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
Ok(r) => r,
Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
};
let manifest = match ctx.load_manifest(store, &commit, &reader) {
Ok(Some(m)) => m,
Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
};
let shard_cid_bytes = shard_cid.to_bytes();
let shard_idx = manifest.shards().iter().position(|s| s.cid.as_bytes() == &shard_cid_bytes);
let groups = match manifest.entries_by_shard() {
Ok(g) => g,
Err(e) => return AuditResult::Error(format!("Failed to group entries: {e}")),
};
let manifest_entries = shard_idx
.and_then(|idx| groups.get(idx))
.cloned()
.unwrap_or_default();
let ancestor_keys = collect_ancestor_content_keys_vault(&ctx.crypto.vault, store, &commit);
let decrypted = match reader.decrypt_shard(encrypted, wrapped_key, &ancestor_keys) {
Ok(d) => d,
Err(e) => return AuditResult::Error(format!("Failed to decrypt shard: {e}")),
};
let body = match decrypted.decompress() {
Ok(b) => b,
Err(e) => return AuditResult::Error(format!("Invalid shard: {e}")),
};
let shard_ref = shard_idx.and_then(|idx| manifest.shards().get(idx));
let entries: Vec<ShardEntry> = manifest_entries
.iter()
.map(|e| ShardEntry {
path: e.path.clone(),
size: e.length,
lines: e.lines,
})
.collect();
AuditResult::Shard(ShardAudit {
version: 0, entry_count: entries.len() as u32,
dir_count: 0,
body_compressed: shard_ref.map(|s| s.size_compressed as u32).unwrap_or(0),
body_decompressed: body.len() as u32,
entries,
parent_commit: Some(parent_info(commit_idx)),
})
}
fn queue_parents(commit: &Commit, queue: &mut VecDeque<VoidCid>) {
for parent_bytes in &commit.parents {
if !parent_bytes.as_bytes().is_empty() {
if let Ok(parent_cid) = parent_bytes.to_void_cid() {
queue.push_back(parent_cid);
}
}
}
}
fn parent_info(idx: &CommitIndex) -> ParentCommitInfo {
ParentCommitInfo {
cid: idx.commit_cid.clone(),
message: idx.message.clone(),
timestamp: idx.timestamp,
}
}
fn extract_shard_refs(bundle: &MetadataBundle) -> Vec<ShardRef> {
bundle
.shard_map
.ranges
.iter()
.filter_map(|r| {
r.cid.as_ref().map(|shard_cid_typed| {
let cid_str = void_cid::from_bytes(shard_cid_typed.as_bytes())
.map(|c| c.to_string())
.unwrap_or_else(|_| hex::encode(shard_cid_typed.as_bytes()));
ShardRef {
shard_id: r.shard_id as u32,
cid: cid_str,
}
})
})
.collect()
}