Skip to main content

void_audit_tui/
void_backend.rs

1//! Void repository backend for audit TUI.
2//!
3//! Provides object enumeration, indexing, and audit using `VoidContext`.
4
5use std::collections::VecDeque;
6use std::fs;
7
8use thiserror::Error;
9use void_core::{
10    cid as void_cid,
11    cid::VoidCid,
12    collab::WrappedKey,
13    crypto::{
14        reader::collect_ancestor_content_keys_vault,
15        EncryptedShard,
16    },
17    metadata::{Commit, MetadataBundle},
18    support::ToVoidCid,
19    store::{FsStore, ObjectStoreExt},
20    VoidContext,
21};
22
23/// Error type for void backend operations.
24#[derive(Debug, Error)]
25pub enum VoidBackendError {
26    #[error("io error: {0}")]
27    Io(#[from] std::io::Error),
28
29    #[error("void error: {0}")]
30    Void(#[from] void_core::VoidError),
31}
32
33/// Result type for void backend operations.
34pub type Result<T> = std::result::Result<T, VoidBackendError>;
35
36/// Type of a void object.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ObjectType {
39    Commit,
40    Metadata,
41    Manifest,
42    RepoManifest,
43    Shard,
44    Unknown,
45}
46
47impl ObjectType {
48    pub fn as_str(&self) -> &'static str {
49        match self {
50            ObjectType::Commit => "commit",
51            ObjectType::Metadata => "metadata",
52            ObjectType::Manifest => "manifest",
53            ObjectType::RepoManifest => "repo-manifest",
54            ObjectType::Shard => "shard",
55            ObjectType::Unknown => "unknown",
56        }
57    }
58}
59
60/// Encryption format, derived from the AAD used during encryption.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum Format {
63    CommitV1,
64    ShardV1,
65    MetadataV1,
66    ManifestV1,
67    RepoManifestV1,
68    Unknown,
69}
70
71impl Format {
72    pub fn as_str(&self) -> &'static str {
73        match self {
74            Format::CommitV1 => "commit/v1",
75            Format::ShardV1 => "shard/v1",
76            Format::MetadataV1 => "metadata/v1",
77            Format::ManifestV1 => "manifest/v1",
78            Format::RepoManifestV1 => "repo-manifest/v1",
79            Format::Unknown => "unknown",
80        }
81    }
82
83    /// Whether this is a recognized (non-unknown) format.
84    pub fn is_known(&self) -> bool {
85        !matches!(self, Format::Unknown)
86    }
87}
88
89/// Information about a repository object.
90#[derive(Debug, Clone)]
91pub struct ObjectInfo {
92    pub cid: String,
93    pub object_type: ObjectType,
94    pub format: Format,
95    pub encrypted_size: usize,
96}
97
98impl ObjectInfo {
99    /// Return first 12 chars of CID for display.
100    pub fn short_cid(&self) -> &str {
101        &self.cid[..self.cid.len().min(12)]
102    }
103}
104
105/// Detailed audit result for a commit.
106#[derive(Debug, Clone)]
107pub struct CommitAudit {
108    pub message: String,
109    pub timestamp: u64,
110    pub parent_cid: Option<String>,
111    pub metadata_cid: String,
112    pub is_signed: bool,
113    pub author: Option<String>,
114}
115
116/// Detailed audit result for metadata.
117#[derive(Debug, Clone)]
118pub struct MetadataAudit {
119    pub version: u32,
120    pub range_count: usize,
121    pub shards: Vec<ShardRef>,
122    pub parent_commit: Option<ParentCommitInfo>,
123}
124
125/// Reference to a shard.
126#[derive(Debug, Clone)]
127pub struct ShardRef {
128    pub shard_id: u32,
129    pub cid: String,
130}
131
132/// Detailed audit result for a shard.
133#[derive(Debug, Clone)]
134pub struct ShardAudit {
135    pub version: u32,
136    pub entry_count: u32,
137    pub dir_count: u32,
138    pub body_compressed: u32,
139    pub body_decompressed: u32,
140    pub entries: Vec<ShardEntry>,
141    pub parent_commit: Option<ParentCommitInfo>,
142}
143
144/// A file entry in a shard.
145#[derive(Debug, Clone)]
146pub struct ShardEntry {
147    pub path: String,
148    pub size: u64,
149    pub lines: u32,
150}
151
152/// Parent commit information.
153#[derive(Debug, Clone)]
154pub struct ParentCommitInfo {
155    pub cid: String,
156    pub message: String,
157    pub timestamp: u64,
158}
159
160/// A file entry in the tree manifest.
161#[derive(Debug, Clone)]
162pub struct ManifestFileEntry {
163    pub path: String,
164    pub size: u64,
165    pub lines: u32,
166    pub shard_index: u32,
167}
168
169/// A shard summary in the tree manifest.
170#[derive(Debug, Clone)]
171pub struct ManifestShardInfo {
172    pub cid: String,
173    pub size_compressed: u64,
174    pub size_decompressed: u64,
175    pub file_count: usize,
176}
177
178/// Detailed audit result for a tree manifest.
179#[derive(Debug, Clone)]
180pub struct ManifestAudit {
181    pub file_count: u64,
182    pub total_bytes: u64,
183    pub shard_count: usize,
184    pub files: Vec<ManifestFileEntry>,
185    pub shards: Vec<ManifestShardInfo>,
186    pub parent_commit: Option<ParentCommitInfo>,
187}
188
189/// Detailed audit result for a repo manifest (contributors.json).
190#[derive(Debug, Clone)]
191pub struct RepoManifestAudit {
192    pub encrypted_size: usize,
193    pub parent_commit: Option<ParentCommitInfo>,
194}
195
196/// Full audit result for any object type.
197#[derive(Debug, Clone)]
198pub enum AuditResult {
199    Commit(CommitAudit),
200    Metadata(MetadataAudit),
201    Manifest(ManifestAudit),
202    RepoManifest(RepoManifestAudit),
203    Shard(ShardAudit),
204    Error(String),
205}
206
207/// Cached info about a commit's referenced objects.
208#[derive(Debug, Clone)]
209pub struct CommitIndex {
210    pub commit_cid: String,
211    pub message: String,
212    pub timestamp: u64,
213    pub metadata_cid: String,
214    pub shard_cids: Vec<(String, Option<WrappedKey>)>,
215}
216
217/// What type of object an indexed CID refers to.
218pub enum IndexedObject<'a> {
219    Commit,
220    Metadata(&'a CommitIndex),
221    Manifest(&'a CommitIndex),
222    RepoManifest(&'a CommitIndex),
223    Shard(&'a CommitIndex, Option<&'a WrappedKey>),
224    Unknown,
225}
226
227/// Pre-built index mapping object CIDs to their parent commits.
228#[derive(Debug, Default)]
229pub struct ObjectIndex {
230    pub metadata_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
231    pub manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
232    pub repo_manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
233    pub shard_to_commit: rustc_hash::FxHashMap<String, (CommitIndex, Option<WrappedKey>)>,
234    pub commit_cids: rustc_hash::FxHashSet<String>,
235}
236
237impl ObjectIndex {
238    /// Look up a CID and return its known type with associated context.
239    pub fn lookup(&self, cid_str: &str) -> IndexedObject<'_> {
240        if self.commit_cids.contains(cid_str) {
241            return IndexedObject::Commit;
242        }
243        if let Some(idx) = self.metadata_to_commit.get(cid_str) {
244            return IndexedObject::Metadata(idx);
245        }
246        if let Some(idx) = self.manifest_to_commit.get(cid_str) {
247            return IndexedObject::Manifest(idx);
248        }
249        if let Some(idx) = self.repo_manifest_to_commit.get(cid_str) {
250            return IndexedObject::RepoManifest(idx);
251        }
252        if let Some((idx, wrapped_key)) = self.shard_to_commit.get(cid_str) {
253            return IndexedObject::Shard(idx, wrapped_key.as_ref());
254        }
255        IndexedObject::Unknown
256    }
257}
258
259// ---------------------------------------------------------------------------
260// Index building
261// ---------------------------------------------------------------------------
262
263/// Build an index by walking commit history once using BFS.
264///
265/// Starts from HEAD and all branch heads, correctly handling merge commits.
266pub fn build_index(ctx: &VoidContext, store: &FsStore, max_commits: usize) -> Result<ObjectIndex> {
267    let mut index = ObjectIndex::default();
268    let mut visited: rustc_hash::FxHashSet<String> = rustc_hash::FxHashSet::default();
269    let mut queue: VecDeque<VoidCid> = VecDeque::new();
270
271    // Start from HEAD
272    if let Ok(Some(head_cid)) = ctx.resolve_head() {
273        if let Ok(cid) = void_cid::from_bytes(head_cid.as_bytes()) {
274            queue.push_back(cid);
275        }
276    }
277
278    // Also add all branch heads for complete coverage
279    let refs_dir = ctx.paths.void_dir.join("refs/heads");
280    if refs_dir.exists() {
281        if let Ok(entries) = fs::read_dir(&refs_dir) {
282            for entry in entries.flatten() {
283                if let Ok(content) = fs::read_to_string(entry.path()) {
284                    if let Ok(branch_cid) = void_cid::parse(content.trim()) {
285                        queue.push_back(branch_cid);
286                    }
287                }
288            }
289        }
290    }
291
292    let mut commits_processed = 0;
293
294    while let Some(commit_cid) = queue.pop_front() {
295        let commit_cid_str = commit_cid.to_string();
296
297        if visited.contains(&commit_cid_str) {
298            continue;
299        }
300        visited.insert(commit_cid_str.clone());
301
302        if commits_processed >= max_commits {
303            continue;
304        }
305        commits_processed += 1;
306
307        index.commit_cids.insert(commit_cid_str.clone());
308
309        // Load commit + metadata in one call
310        let (commit, bundle, _reader) = match ctx.load_commit_with_metadata(store, &commit_cid) {
311            Ok(r) => r,
312            Err(_) => {
313                // Still need to try loading just the commit to queue parents
314                if let Ok((commit, _)) = ctx.load_commit(store, &commit_cid) {
315                    queue_parents(&commit, &mut queue);
316                }
317                continue;
318            }
319        };
320
321        let metadata_cid_str = commit.metadata_bundle.to_void_cid()
322            .map(|c| c.to_string())
323            .unwrap_or_default();
324
325        // Collect shard CIDs from metadata
326        let mut shard_cids = Vec::new();
327        for range in &bundle.shard_map.ranges {
328            if let Some(ref shard_cid_typed) = range.cid {
329                if let Ok(shard_cid) = void_cid::from_bytes(shard_cid_typed.as_bytes()) {
330                    shard_cids.push((shard_cid.to_string(), range.wrapped_key.clone()));
331                }
332            }
333        }
334
335        let commit_index = CommitIndex {
336            commit_cid: commit_cid_str,
337            message: commit.message.clone(),
338            timestamp: commit.timestamp,
339            metadata_cid: metadata_cid_str.clone(),
340            shard_cids: shard_cids.clone(),
341        };
342
343        index.metadata_to_commit.insert(metadata_cid_str, commit_index.clone());
344
345        for (shard_cid_str, wrapped_key) in shard_cids {
346            index.shard_to_commit.insert(shard_cid_str, (commit_index.clone(), wrapped_key));
347        }
348
349        if let Some(ref manifest_cid_bytes) = commit.manifest_cid {
350            if let Ok(manifest_cid) = manifest_cid_bytes.to_void_cid() {
351                index.manifest_to_commit.insert(manifest_cid.to_string(), commit_index.clone());
352            }
353        }
354
355        if let Some(ref rm_cid_bytes) = commit.repo_manifest_cid {
356            if let Ok(rm_cid) = rm_cid_bytes.to_void_cid() {
357                index.repo_manifest_to_commit.insert(rm_cid.to_string(), commit_index.clone());
358            }
359        }
360
361        queue_parents(&commit, &mut queue);
362    }
363
364    Ok(index)
365}
366
367// ---------------------------------------------------------------------------
368// Object listing and categorization
369// ---------------------------------------------------------------------------
370
371/// List all object CIDs in the repository.
372pub fn list_all_objects(ctx: &VoidContext) -> Vec<String> {
373    let objects_dir = ctx.paths.void_dir.join("objects");
374    let mut cids = Vec::new();
375
376    let Ok(prefixes) = fs::read_dir(&objects_dir) else {
377        return cids;
378    };
379
380    for prefix_entry in prefixes.flatten() {
381        let prefix_path = prefix_entry.path();
382        if !prefix_path.is_dir() {
383            continue;
384        }
385
386        let Ok(objects) = fs::read_dir(&prefix_path) else {
387            continue;
388        };
389
390        for obj_entry in objects.flatten() {
391            if let Some(name) = obj_entry.file_name().to_str() {
392                if !name.ends_with(".tmp") {
393                    cids.push(name.to_string());
394                }
395            }
396        }
397    }
398
399    cids.sort();
400    cids
401}
402
403/// Categorize an object using the pre-built index.
404pub fn categorize_object(store: &FsStore, index: &ObjectIndex, cid_str: &str) -> ObjectInfo {
405    let cid = match void_cid::parse(cid_str) {
406        Ok(c) => c,
407        Err(_) => {
408            return ObjectInfo {
409                cid: cid_str.to_string(),
410                object_type: ObjectType::Unknown,
411                format: Format::Unknown,
412                encrypted_size: 0,
413            };
414        }
415    };
416
417    let object_type = match index.lookup(cid_str) {
418        IndexedObject::Commit => ObjectType::Commit,
419        IndexedObject::Metadata(_) => ObjectType::Metadata,
420        IndexedObject::Manifest(_) => ObjectType::Manifest,
421        IndexedObject::RepoManifest(_) => ObjectType::RepoManifest,
422        IndexedObject::Shard(_, _) => ObjectType::Shard,
423        IndexedObject::Unknown => ObjectType::Unknown,
424    };
425
426    let encrypted_size = store.get_blob::<EncryptedShard>(&cid)
427        .map(|b| b.as_bytes().len())
428        .unwrap_or(0);
429
430    ObjectInfo {
431        cid: cid_str.to_string(),
432        object_type,
433        format: Format::Unknown,
434        encrypted_size,
435    }
436}
437
438// ---------------------------------------------------------------------------
439// Audit functions
440// ---------------------------------------------------------------------------
441
442/// Audit an object using the pre-built index.
443pub fn audit_object_indexed(
444    ctx: &VoidContext,
445    store: &FsStore,
446    index: &ObjectIndex,
447    cid_str: &str,
448) -> AuditResult {
449    let cid = match void_cid::parse(cid_str) {
450        Ok(c) => c,
451        Err(e) => return AuditResult::Error(format!("Invalid CID: {e}")),
452    };
453
454    match index.lookup(cid_str) {
455        IndexedObject::Commit => {
456            match ctx.load_commit(store, &cid) {
457                Ok((commit, _)) => audit_commit(&commit),
458                Err(e) => AuditResult::Error(format!("Failed to decrypt commit: {e}")),
459            }
460        }
461        IndexedObject::Metadata(commit_idx) => {
462            audit_metadata_indexed(ctx, store, commit_idx)
463        }
464        IndexedObject::Manifest(commit_idx) => {
465            audit_manifest_indexed(ctx, store, commit_idx)
466        }
467        IndexedObject::RepoManifest(commit_idx) => {
468            let size = store.get_blob::<EncryptedShard>(&cid)
469                .map(|b| b.as_bytes().len())
470                .unwrap_or(0);
471            AuditResult::RepoManifest(RepoManifestAudit {
472                encrypted_size: size,
473                parent_commit: Some(parent_info(commit_idx)),
474            })
475        }
476        IndexedObject::Shard(commit_idx, wrapped_key) => {
477            let encrypted: EncryptedShard = match store.get_blob(&cid) {
478                Ok(d) => d,
479                Err(e) => return AuditResult::Error(format!("Object not found: {e}")),
480            };
481            audit_shard_idxed(ctx, store, &encrypted, wrapped_key, commit_idx, &cid)
482        }
483        IndexedObject::Unknown => {
484            AuditResult::Error("Object not found in commit history - may be orphaned".to_string())
485        }
486    }
487}
488
489/// Audit a commit object.
490fn audit_commit(commit: &Commit) -> AuditResult {
491    let parent_cid = commit.parents.first().and_then(|p| {
492        p.to_void_cid().map(|c| c.to_string()).ok()
493    });
494
495    let metadata_cid = commit.metadata_bundle.to_void_cid()
496        .map(|c| c.to_string())
497        .unwrap_or_else(|_| hex::encode(commit.metadata_bundle.as_bytes()));
498
499    AuditResult::Commit(CommitAudit {
500        message: commit.message.clone(),
501        timestamp: commit.timestamp,
502        parent_cid,
503        metadata_cid,
504        is_signed: commit.is_signed(),
505        author: commit.author.map(|a| a.to_hex()),
506    })
507}
508
509/// Audit metadata using indexed commit info.
510fn audit_metadata_indexed(
511    ctx: &VoidContext,
512    store: &FsStore,
513    commit_idx: &CommitIndex,
514) -> AuditResult {
515    let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
516        Ok(c) => c,
517        Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
518    };
519
520    let (_, metadata, _) = match ctx.load_commit_with_metadata(store, &commit_cid) {
521        Ok(r) => r,
522        Err(e) => return AuditResult::Error(format!("Failed to load commit+metadata: {e}")),
523    };
524
525    let shards = extract_shard_refs(&metadata);
526
527    AuditResult::Metadata(MetadataAudit {
528        version: metadata.version,
529        range_count: metadata.shard_map.ranges.len(),
530        shards,
531        parent_commit: Some(parent_info(commit_idx)),
532    })
533}
534
535/// Audit tree manifest using indexed commit info.
536fn audit_manifest_indexed(
537    ctx: &VoidContext,
538    store: &FsStore,
539    commit_idx: &CommitIndex,
540) -> AuditResult {
541    let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
542        Ok(c) => c,
543        Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
544    };
545
546    let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
547        Ok(r) => r,
548        Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
549    };
550
551    let manifest = match ctx.load_manifest(store, &commit, &reader) {
552        Ok(Some(m)) => m,
553        Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
554        Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
555    };
556
557    // Extract file entries
558    let files: Vec<ManifestFileEntry> = manifest.iter()
559        .filter_map(|r| r.ok())
560        .map(|e| ManifestFileEntry {
561            path: e.path.clone(),
562            size: e.size,
563            lines: e.lines,
564            shard_index: e.shard_index,
565        })
566        .collect();
567
568    // Extract shard info with file counts
569    let groups = manifest.entries_by_shard().unwrap_or_default();
570    let shards: Vec<ManifestShardInfo> = manifest.shards().iter().enumerate()
571        .map(|(i, s)| {
572            let cid_str = void_cid::from_bytes(s.cid.as_bytes())
573                .map(|c| c.to_string())
574                .unwrap_or_else(|_| hex::encode(s.cid.as_bytes()));
575            ManifestShardInfo {
576                cid: cid_str,
577                size_compressed: s.size_compressed,
578                size_decompressed: s.size_decompressed,
579                file_count: groups.get(i).map(|g| g.len()).unwrap_or(0),
580            }
581        })
582        .collect();
583
584    AuditResult::Manifest(ManifestAudit {
585        file_count: manifest.total_files(),
586        total_bytes: manifest.total_bytes(),
587        shard_count: manifest.shards().len(),
588        files,
589        shards,
590        parent_commit: Some(parent_info(commit_idx)),
591    })
592}
593
594/// Audit shard using indexed commit info and TreeManifest.
595fn audit_shard_idxed(
596    ctx: &VoidContext,
597    store: &FsStore,
598    encrypted: &EncryptedShard,
599    wrapped_key: Option<&WrappedKey>,
600    commit_idx: &CommitIndex,
601    shard_cid: &VoidCid,
602) -> AuditResult {
603    let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
604        Ok(c) => c,
605        Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
606    };
607
608    let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
609        Ok(r) => r,
610        Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
611    };
612
613    // Load manifest to get file entries for this shard
614    let manifest = match ctx.load_manifest(store, &commit, &reader) {
615        Ok(Some(m)) => m,
616        Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
617        Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
618    };
619
620    // Find shard index by matching CID against manifest shards
621    let shard_cid_bytes = shard_cid.to_bytes();
622    let shard_idx = manifest.shards().iter().position(|s| s.cid.as_bytes() == &shard_cid_bytes);
623
624    let groups = match manifest.entries_by_shard() {
625        Ok(g) => g,
626        Err(e) => return AuditResult::Error(format!("Failed to group entries: {e}")),
627    };
628
629    let manifest_entries = shard_idx
630        .and_then(|idx| groups.get(idx))
631        .cloned()
632        .unwrap_or_default();
633
634    // Decrypt and decompress to validate shard integrity
635    let ancestor_keys = collect_ancestor_content_keys_vault(&ctx.crypto.vault, store, &commit);
636    let decrypted = match reader.decrypt_shard(encrypted, wrapped_key, &ancestor_keys) {
637        Ok(d) => d,
638        Err(e) => return AuditResult::Error(format!("Failed to decrypt shard: {e}")),
639    };
640
641    let body = match decrypted.decompress() {
642        Ok(b) => b,
643        Err(e) => return AuditResult::Error(format!("Invalid shard: {e}")),
644    };
645
646    let shard_ref = shard_idx.and_then(|idx| manifest.shards().get(idx));
647
648    let entries: Vec<ShardEntry> = manifest_entries
649        .iter()
650        .map(|e| ShardEntry {
651            path: e.path.clone(),
652            size: e.length,
653            lines: e.lines,
654        })
655        .collect();
656
657    AuditResult::Shard(ShardAudit {
658        version: 0, // headerless format
659        entry_count: entries.len() as u32,
660        dir_count: 0,
661        body_compressed: shard_ref.map(|s| s.size_compressed as u32).unwrap_or(0),
662        body_decompressed: body.len() as u32,
663        entries,
664        parent_commit: Some(parent_info(commit_idx)),
665    })
666}
667
668// ---------------------------------------------------------------------------
669// Helpers
670// ---------------------------------------------------------------------------
671
672/// Queue all parent CIDs from a commit into the BFS queue.
673fn queue_parents(commit: &Commit, queue: &mut VecDeque<VoidCid>) {
674    for parent_bytes in &commit.parents {
675        if !parent_bytes.as_bytes().is_empty() {
676            if let Ok(parent_cid) = parent_bytes.to_void_cid() {
677                queue.push_back(parent_cid);
678            }
679        }
680    }
681}
682
683/// Build a ParentCommitInfo from a CommitIndex.
684fn parent_info(idx: &CommitIndex) -> ParentCommitInfo {
685    ParentCommitInfo {
686        cid: idx.commit_cid.clone(),
687        message: idx.message.clone(),
688        timestamp: idx.timestamp,
689    }
690}
691
692/// Extract shard references from a metadata bundle.
693fn extract_shard_refs(bundle: &MetadataBundle) -> Vec<ShardRef> {
694    bundle
695        .shard_map
696        .ranges
697        .iter()
698        .filter_map(|r| {
699            r.cid.as_ref().map(|shard_cid_typed| {
700                let cid_str = void_cid::from_bytes(shard_cid_typed.as_bytes())
701                    .map(|c| c.to_string())
702                    .unwrap_or_else(|_| hex::encode(shard_cid_typed.as_bytes()));
703                ShardRef {
704                    shard_id: r.shard_id as u32,
705                    cid: cid_str,
706                }
707            })
708        })
709        .collect()
710}