1use std::collections::VecDeque;
6use std::fs;
7
8use thiserror::Error;
9use void_core::{
10 cid as void_cid,
11 cid::VoidCid,
12 collab::WrappedKey,
13 crypto::{
14 reader::collect_ancestor_content_keys_vault,
15 EncryptedShard,
16 },
17 metadata::{Commit, MetadataBundle},
18 support::ToVoidCid,
19 store::{FsStore, ObjectStoreExt},
20 VoidContext,
21};
22
23#[derive(Debug, Error)]
25pub enum VoidBackendError {
26 #[error("io error: {0}")]
27 Io(#[from] std::io::Error),
28
29 #[error("void error: {0}")]
30 Void(#[from] void_core::VoidError),
31}
32
33pub type Result<T> = std::result::Result<T, VoidBackendError>;
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ObjectType {
39 Commit,
40 Metadata,
41 Manifest,
42 RepoManifest,
43 Shard,
44 Unknown,
45}
46
47impl ObjectType {
48 pub fn as_str(&self) -> &'static str {
49 match self {
50 ObjectType::Commit => "commit",
51 ObjectType::Metadata => "metadata",
52 ObjectType::Manifest => "manifest",
53 ObjectType::RepoManifest => "repo-manifest",
54 ObjectType::Shard => "shard",
55 ObjectType::Unknown => "unknown",
56 }
57 }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum Format {
63 CommitV1,
64 ShardV1,
65 MetadataV1,
66 ManifestV1,
67 RepoManifestV1,
68 Unknown,
69}
70
71impl Format {
72 pub fn as_str(&self) -> &'static str {
73 match self {
74 Format::CommitV1 => "commit/v1",
75 Format::ShardV1 => "shard/v1",
76 Format::MetadataV1 => "metadata/v1",
77 Format::ManifestV1 => "manifest/v1",
78 Format::RepoManifestV1 => "repo-manifest/v1",
79 Format::Unknown => "unknown",
80 }
81 }
82
83 pub fn is_known(&self) -> bool {
85 !matches!(self, Format::Unknown)
86 }
87}
88
89#[derive(Debug, Clone)]
91pub struct ObjectInfo {
92 pub cid: String,
93 pub object_type: ObjectType,
94 pub format: Format,
95 pub encrypted_size: usize,
96}
97
98impl ObjectInfo {
99 pub fn short_cid(&self) -> &str {
101 &self.cid[..self.cid.len().min(12)]
102 }
103}
104
105#[derive(Debug, Clone)]
107pub struct CommitAudit {
108 pub message: String,
109 pub timestamp: u64,
110 pub parent_cid: Option<String>,
111 pub metadata_cid: String,
112 pub is_signed: bool,
113 pub author: Option<String>,
114}
115
116#[derive(Debug, Clone)]
118pub struct MetadataAudit {
119 pub version: u32,
120 pub range_count: usize,
121 pub shards: Vec<ShardRef>,
122 pub parent_commit: Option<ParentCommitInfo>,
123}
124
125#[derive(Debug, Clone)]
127pub struct ShardRef {
128 pub shard_id: u32,
129 pub cid: String,
130}
131
132#[derive(Debug, Clone)]
134pub struct ShardAudit {
135 pub version: u32,
136 pub entry_count: u32,
137 pub dir_count: u32,
138 pub body_compressed: u32,
139 pub body_decompressed: u32,
140 pub entries: Vec<ShardEntry>,
141 pub parent_commit: Option<ParentCommitInfo>,
142}
143
144#[derive(Debug, Clone)]
146pub struct ShardEntry {
147 pub path: String,
148 pub size: u64,
149 pub lines: u32,
150}
151
152#[derive(Debug, Clone)]
154pub struct ParentCommitInfo {
155 pub cid: String,
156 pub message: String,
157 pub timestamp: u64,
158}
159
160#[derive(Debug, Clone)]
162pub struct ManifestFileEntry {
163 pub path: String,
164 pub size: u64,
165 pub lines: u32,
166 pub shard_index: u32,
167}
168
169#[derive(Debug, Clone)]
171pub struct ManifestShardInfo {
172 pub cid: String,
173 pub size_compressed: u64,
174 pub size_decompressed: u64,
175 pub file_count: usize,
176}
177
178#[derive(Debug, Clone)]
180pub struct ManifestAudit {
181 pub file_count: u64,
182 pub total_bytes: u64,
183 pub shard_count: usize,
184 pub files: Vec<ManifestFileEntry>,
185 pub shards: Vec<ManifestShardInfo>,
186 pub parent_commit: Option<ParentCommitInfo>,
187}
188
189#[derive(Debug, Clone)]
191pub struct RepoManifestAudit {
192 pub encrypted_size: usize,
193 pub parent_commit: Option<ParentCommitInfo>,
194}
195
196#[derive(Debug, Clone)]
198pub enum AuditResult {
199 Commit(CommitAudit),
200 Metadata(MetadataAudit),
201 Manifest(ManifestAudit),
202 RepoManifest(RepoManifestAudit),
203 Shard(ShardAudit),
204 Error(String),
205}
206
207#[derive(Debug, Clone)]
209pub struct CommitIndex {
210 pub commit_cid: String,
211 pub message: String,
212 pub timestamp: u64,
213 pub metadata_cid: String,
214 pub shard_cids: Vec<(String, Option<WrappedKey>)>,
215}
216
217pub enum IndexedObject<'a> {
219 Commit,
220 Metadata(&'a CommitIndex),
221 Manifest(&'a CommitIndex),
222 RepoManifest(&'a CommitIndex),
223 Shard(&'a CommitIndex, Option<&'a WrappedKey>),
224 Unknown,
225}
226
227#[derive(Debug, Default)]
229pub struct ObjectIndex {
230 pub metadata_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
231 pub manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
232 pub repo_manifest_to_commit: rustc_hash::FxHashMap<String, CommitIndex>,
233 pub shard_to_commit: rustc_hash::FxHashMap<String, (CommitIndex, Option<WrappedKey>)>,
234 pub commit_cids: rustc_hash::FxHashSet<String>,
235}
236
237impl ObjectIndex {
238 pub fn lookup(&self, cid_str: &str) -> IndexedObject<'_> {
240 if self.commit_cids.contains(cid_str) {
241 return IndexedObject::Commit;
242 }
243 if let Some(idx) = self.metadata_to_commit.get(cid_str) {
244 return IndexedObject::Metadata(idx);
245 }
246 if let Some(idx) = self.manifest_to_commit.get(cid_str) {
247 return IndexedObject::Manifest(idx);
248 }
249 if let Some(idx) = self.repo_manifest_to_commit.get(cid_str) {
250 return IndexedObject::RepoManifest(idx);
251 }
252 if let Some((idx, wrapped_key)) = self.shard_to_commit.get(cid_str) {
253 return IndexedObject::Shard(idx, wrapped_key.as_ref());
254 }
255 IndexedObject::Unknown
256 }
257}
258
259pub fn build_index(ctx: &VoidContext, store: &FsStore, max_commits: usize) -> Result<ObjectIndex> {
267 let mut index = ObjectIndex::default();
268 let mut visited: rustc_hash::FxHashSet<String> = rustc_hash::FxHashSet::default();
269 let mut queue: VecDeque<VoidCid> = VecDeque::new();
270
271 if let Ok(Some(head_cid)) = ctx.resolve_head() {
273 if let Ok(cid) = void_cid::from_bytes(head_cid.as_bytes()) {
274 queue.push_back(cid);
275 }
276 }
277
278 let refs_dir = ctx.paths.void_dir.join("refs/heads");
280 if refs_dir.exists() {
281 if let Ok(entries) = fs::read_dir(&refs_dir) {
282 for entry in entries.flatten() {
283 if let Ok(content) = fs::read_to_string(entry.path()) {
284 if let Ok(branch_cid) = void_cid::parse(content.trim()) {
285 queue.push_back(branch_cid);
286 }
287 }
288 }
289 }
290 }
291
292 let mut commits_processed = 0;
293
294 while let Some(commit_cid) = queue.pop_front() {
295 let commit_cid_str = commit_cid.to_string();
296
297 if visited.contains(&commit_cid_str) {
298 continue;
299 }
300 visited.insert(commit_cid_str.clone());
301
302 if commits_processed >= max_commits {
303 continue;
304 }
305 commits_processed += 1;
306
307 index.commit_cids.insert(commit_cid_str.clone());
308
309 let (commit, bundle, _reader) = match ctx.load_commit_with_metadata(store, &commit_cid) {
311 Ok(r) => r,
312 Err(_) => {
313 if let Ok((commit, _)) = ctx.load_commit(store, &commit_cid) {
315 queue_parents(&commit, &mut queue);
316 }
317 continue;
318 }
319 };
320
321 let metadata_cid_str = commit.metadata_bundle.to_void_cid()
322 .map(|c| c.to_string())
323 .unwrap_or_default();
324
325 let mut shard_cids = Vec::new();
327 for range in &bundle.shard_map.ranges {
328 if let Some(ref shard_cid_typed) = range.cid {
329 if let Ok(shard_cid) = void_cid::from_bytes(shard_cid_typed.as_bytes()) {
330 shard_cids.push((shard_cid.to_string(), range.wrapped_key.clone()));
331 }
332 }
333 }
334
335 let commit_index = CommitIndex {
336 commit_cid: commit_cid_str,
337 message: commit.message.clone(),
338 timestamp: commit.timestamp,
339 metadata_cid: metadata_cid_str.clone(),
340 shard_cids: shard_cids.clone(),
341 };
342
343 index.metadata_to_commit.insert(metadata_cid_str, commit_index.clone());
344
345 for (shard_cid_str, wrapped_key) in shard_cids {
346 index.shard_to_commit.insert(shard_cid_str, (commit_index.clone(), wrapped_key));
347 }
348
349 if let Some(ref manifest_cid_bytes) = commit.manifest_cid {
350 if let Ok(manifest_cid) = manifest_cid_bytes.to_void_cid() {
351 index.manifest_to_commit.insert(manifest_cid.to_string(), commit_index.clone());
352 }
353 }
354
355 if let Some(ref rm_cid_bytes) = commit.repo_manifest_cid {
356 if let Ok(rm_cid) = rm_cid_bytes.to_void_cid() {
357 index.repo_manifest_to_commit.insert(rm_cid.to_string(), commit_index.clone());
358 }
359 }
360
361 queue_parents(&commit, &mut queue);
362 }
363
364 Ok(index)
365}
366
367pub fn list_all_objects(ctx: &VoidContext) -> Vec<String> {
373 let objects_dir = ctx.paths.void_dir.join("objects");
374 let mut cids = Vec::new();
375
376 let Ok(prefixes) = fs::read_dir(&objects_dir) else {
377 return cids;
378 };
379
380 for prefix_entry in prefixes.flatten() {
381 let prefix_path = prefix_entry.path();
382 if !prefix_path.is_dir() {
383 continue;
384 }
385
386 let Ok(objects) = fs::read_dir(&prefix_path) else {
387 continue;
388 };
389
390 for obj_entry in objects.flatten() {
391 if let Some(name) = obj_entry.file_name().to_str() {
392 if !name.ends_with(".tmp") {
393 cids.push(name.to_string());
394 }
395 }
396 }
397 }
398
399 cids.sort();
400 cids
401}
402
403pub fn categorize_object(store: &FsStore, index: &ObjectIndex, cid_str: &str) -> ObjectInfo {
405 let cid = match void_cid::parse(cid_str) {
406 Ok(c) => c,
407 Err(_) => {
408 return ObjectInfo {
409 cid: cid_str.to_string(),
410 object_type: ObjectType::Unknown,
411 format: Format::Unknown,
412 encrypted_size: 0,
413 };
414 }
415 };
416
417 let object_type = match index.lookup(cid_str) {
418 IndexedObject::Commit => ObjectType::Commit,
419 IndexedObject::Metadata(_) => ObjectType::Metadata,
420 IndexedObject::Manifest(_) => ObjectType::Manifest,
421 IndexedObject::RepoManifest(_) => ObjectType::RepoManifest,
422 IndexedObject::Shard(_, _) => ObjectType::Shard,
423 IndexedObject::Unknown => ObjectType::Unknown,
424 };
425
426 let encrypted_size = store.get_blob::<EncryptedShard>(&cid)
427 .map(|b| b.as_bytes().len())
428 .unwrap_or(0);
429
430 ObjectInfo {
431 cid: cid_str.to_string(),
432 object_type,
433 format: Format::Unknown,
434 encrypted_size,
435 }
436}
437
438pub fn audit_object_indexed(
444 ctx: &VoidContext,
445 store: &FsStore,
446 index: &ObjectIndex,
447 cid_str: &str,
448) -> AuditResult {
449 let cid = match void_cid::parse(cid_str) {
450 Ok(c) => c,
451 Err(e) => return AuditResult::Error(format!("Invalid CID: {e}")),
452 };
453
454 match index.lookup(cid_str) {
455 IndexedObject::Commit => {
456 match ctx.load_commit(store, &cid) {
457 Ok((commit, _)) => audit_commit(&commit),
458 Err(e) => AuditResult::Error(format!("Failed to decrypt commit: {e}")),
459 }
460 }
461 IndexedObject::Metadata(commit_idx) => {
462 audit_metadata_indexed(ctx, store, commit_idx)
463 }
464 IndexedObject::Manifest(commit_idx) => {
465 audit_manifest_indexed(ctx, store, commit_idx)
466 }
467 IndexedObject::RepoManifest(commit_idx) => {
468 let size = store.get_blob::<EncryptedShard>(&cid)
469 .map(|b| b.as_bytes().len())
470 .unwrap_or(0);
471 AuditResult::RepoManifest(RepoManifestAudit {
472 encrypted_size: size,
473 parent_commit: Some(parent_info(commit_idx)),
474 })
475 }
476 IndexedObject::Shard(commit_idx, wrapped_key) => {
477 let encrypted: EncryptedShard = match store.get_blob(&cid) {
478 Ok(d) => d,
479 Err(e) => return AuditResult::Error(format!("Object not found: {e}")),
480 };
481 audit_shard_idxed(ctx, store, &encrypted, wrapped_key, commit_idx, &cid)
482 }
483 IndexedObject::Unknown => {
484 AuditResult::Error("Object not found in commit history - may be orphaned".to_string())
485 }
486 }
487}
488
489fn audit_commit(commit: &Commit) -> AuditResult {
491 let parent_cid = commit.parents.first().and_then(|p| {
492 p.to_void_cid().map(|c| c.to_string()).ok()
493 });
494
495 let metadata_cid = commit.metadata_bundle.to_void_cid()
496 .map(|c| c.to_string())
497 .unwrap_or_else(|_| hex::encode(commit.metadata_bundle.as_bytes()));
498
499 AuditResult::Commit(CommitAudit {
500 message: commit.message.clone(),
501 timestamp: commit.timestamp,
502 parent_cid,
503 metadata_cid,
504 is_signed: commit.is_signed(),
505 author: commit.author.map(|a| a.to_hex()),
506 })
507}
508
509fn audit_metadata_indexed(
511 ctx: &VoidContext,
512 store: &FsStore,
513 commit_idx: &CommitIndex,
514) -> AuditResult {
515 let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
516 Ok(c) => c,
517 Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
518 };
519
520 let (_, metadata, _) = match ctx.load_commit_with_metadata(store, &commit_cid) {
521 Ok(r) => r,
522 Err(e) => return AuditResult::Error(format!("Failed to load commit+metadata: {e}")),
523 };
524
525 let shards = extract_shard_refs(&metadata);
526
527 AuditResult::Metadata(MetadataAudit {
528 version: metadata.version,
529 range_count: metadata.shard_map.ranges.len(),
530 shards,
531 parent_commit: Some(parent_info(commit_idx)),
532 })
533}
534
535fn audit_manifest_indexed(
537 ctx: &VoidContext,
538 store: &FsStore,
539 commit_idx: &CommitIndex,
540) -> AuditResult {
541 let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
542 Ok(c) => c,
543 Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
544 };
545
546 let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
547 Ok(r) => r,
548 Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
549 };
550
551 let manifest = match ctx.load_manifest(store, &commit, &reader) {
552 Ok(Some(m)) => m,
553 Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
554 Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
555 };
556
557 let files: Vec<ManifestFileEntry> = manifest.iter()
559 .filter_map(|r| r.ok())
560 .map(|e| ManifestFileEntry {
561 path: e.path.clone(),
562 size: e.size,
563 lines: e.lines,
564 shard_index: e.shard_index,
565 })
566 .collect();
567
568 let groups = manifest.entries_by_shard().unwrap_or_default();
570 let shards: Vec<ManifestShardInfo> = manifest.shards().iter().enumerate()
571 .map(|(i, s)| {
572 let cid_str = void_cid::from_bytes(s.cid.as_bytes())
573 .map(|c| c.to_string())
574 .unwrap_or_else(|_| hex::encode(s.cid.as_bytes()));
575 ManifestShardInfo {
576 cid: cid_str,
577 size_compressed: s.size_compressed,
578 size_decompressed: s.size_decompressed,
579 file_count: groups.get(i).map(|g| g.len()).unwrap_or(0),
580 }
581 })
582 .collect();
583
584 AuditResult::Manifest(ManifestAudit {
585 file_count: manifest.total_files(),
586 total_bytes: manifest.total_bytes(),
587 shard_count: manifest.shards().len(),
588 files,
589 shards,
590 parent_commit: Some(parent_info(commit_idx)),
591 })
592}
593
594fn audit_shard_idxed(
596 ctx: &VoidContext,
597 store: &FsStore,
598 encrypted: &EncryptedShard,
599 wrapped_key: Option<&WrappedKey>,
600 commit_idx: &CommitIndex,
601 shard_cid: &VoidCid,
602) -> AuditResult {
603 let commit_cid = match void_cid::parse(&commit_idx.commit_cid) {
604 Ok(c) => c,
605 Err(e) => return AuditResult::Error(format!("Invalid commit CID: {e}")),
606 };
607
608 let (commit, reader) = match ctx.load_commit(store, &commit_cid) {
609 Ok(r) => r,
610 Err(e) => return AuditResult::Error(format!("Failed to load commit: {e}")),
611 };
612
613 let manifest = match ctx.load_manifest(store, &commit, &reader) {
615 Ok(Some(m)) => m,
616 Ok(None) => return AuditResult::Error("Commit has no manifest".to_string()),
617 Err(e) => return AuditResult::Error(format!("Failed to load manifest: {e}")),
618 };
619
620 let shard_cid_bytes = shard_cid.to_bytes();
622 let shard_idx = manifest.shards().iter().position(|s| s.cid.as_bytes() == &shard_cid_bytes);
623
624 let groups = match manifest.entries_by_shard() {
625 Ok(g) => g,
626 Err(e) => return AuditResult::Error(format!("Failed to group entries: {e}")),
627 };
628
629 let manifest_entries = shard_idx
630 .and_then(|idx| groups.get(idx))
631 .cloned()
632 .unwrap_or_default();
633
634 let ancestor_keys = collect_ancestor_content_keys_vault(&ctx.crypto.vault, store, &commit);
636 let decrypted = match reader.decrypt_shard(encrypted, wrapped_key, &ancestor_keys) {
637 Ok(d) => d,
638 Err(e) => return AuditResult::Error(format!("Failed to decrypt shard: {e}")),
639 };
640
641 let body = match decrypted.decompress() {
642 Ok(b) => b,
643 Err(e) => return AuditResult::Error(format!("Invalid shard: {e}")),
644 };
645
646 let shard_ref = shard_idx.and_then(|idx| manifest.shards().get(idx));
647
648 let entries: Vec<ShardEntry> = manifest_entries
649 .iter()
650 .map(|e| ShardEntry {
651 path: e.path.clone(),
652 size: e.length,
653 lines: e.lines,
654 })
655 .collect();
656
657 AuditResult::Shard(ShardAudit {
658 version: 0, entry_count: entries.len() as u32,
660 dir_count: 0,
661 body_compressed: shard_ref.map(|s| s.size_compressed as u32).unwrap_or(0),
662 body_decompressed: body.len() as u32,
663 entries,
664 parent_commit: Some(parent_info(commit_idx)),
665 })
666}
667
668fn queue_parents(commit: &Commit, queue: &mut VecDeque<VoidCid>) {
674 for parent_bytes in &commit.parents {
675 if !parent_bytes.as_bytes().is_empty() {
676 if let Ok(parent_cid) = parent_bytes.to_void_cid() {
677 queue.push_back(parent_cid);
678 }
679 }
680 }
681}
682
683fn parent_info(idx: &CommitIndex) -> ParentCommitInfo {
685 ParentCommitInfo {
686 cid: idx.commit_cid.clone(),
687 message: idx.message.clone(),
688 timestamp: idx.timestamp,
689 }
690}
691
692fn extract_shard_refs(bundle: &MetadataBundle) -> Vec<ShardRef> {
694 bundle
695 .shard_map
696 .ranges
697 .iter()
698 .filter_map(|r| {
699 r.cid.as_ref().map(|shard_cid_typed| {
700 let cid_str = void_cid::from_bytes(shard_cid_typed.as_bytes())
701 .map(|c| c.to_string())
702 .unwrap_or_else(|_| hex::encode(shard_cid_typed.as_bytes()));
703 ShardRef {
704 shard_id: r.shard_id as u32,
705 cid: cid_str,
706 }
707 })
708 })
709 .collect()
710}