1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// SPDX-License-Identifier: Apache-2.0
//! Garbage collection command - clean up unreachable objects.
//!
//! **Redaction tombstones are structurally permanent.** They live in
//! `<heddle_dir>/redactions/<blob-hex>.bin`, *outside* the `objects/`
//! subtree GC operates on. `pack_objects` only walks loose blobs/trees
//! (see `crates/objects/src/store/fs/fs_pack.rs`); `prune_loose_objects`
//! only drops bytes whose canonical copy now lives in a pack. Neither
//! ever observes or touches a redaction file — they cannot be packed,
//! cannot be pruned, and cannot be lost to a `gc --prune --aggressive`
//! pass. We report the pinned count so the audit trail in `heddle maintenance gc`
//! output makes the invariant visible to operators.
use anyhow::Result;
use objects::store::ObjectStore;
use serde::Serialize;
#[cfg(feature = "git-overlay")]
use crate::bridge::GitBridge;
use crate::cli::{Cli, render::write_json_stdout, should_output_json};
#[derive(Serialize, Default)]
struct GcOutput {
output_kind: &'static str,
action: &'static str,
status: &'static str,
dry_run: bool,
prune: bool,
packed_count: u64,
bytes_saved: u64,
pruned_loose: u64,
bytes_freed: u64,
pinned_redactions: usize,
preserved_redactions: usize,
#[cfg(feature = "git-overlay")]
pruned_git_mapping_entries: usize,
#[cfg(feature = "git-overlay")]
consolidated_mirror_loose: usize,
}
pub fn cmd_gc(cli: &Cli, prune: bool, aggressive: bool, dry_run: bool) -> Result<()> {
let repo = cli.open_repo()?;
let json = should_output_json(cli, Some(repo.config()));
let mut summary = GcOutput {
output_kind: "gc",
action: "gc",
status: "ok",
dry_run,
prune,
..Default::default()
};
// Snapshot redactions before GC so we can both report the pinned
// count and (post-GC) assert that no record was disturbed. The
// assertion is defence-in-depth: GC structurally cannot reach
// these files, but the audit step costs O(redactions) and gives
// operators a hard guarantee in writing.
let redactions_before = repo.list_all_redactions().unwrap_or_default();
let pinned_redactions: usize = redactions_before
.iter()
.map(|(_, blob)| blob.redactions.len())
.sum();
summary.pinned_redactions = pinned_redactions;
if dry_run {
let blobs = repo.store().list_blobs()?;
let trees = repo.store().list_trees()?;
let total_objects = blobs.len() + trees.len();
summary.packed_count = total_objects as u64;
summary.status = "dry_run";
if !json {
println!(
"Would pack {} objects ({} blobs, {} trees)",
total_objects,
blobs.len(),
trees.len()
);
let _ = prune;
println!("Would prune redundant loose objects after consolidating into a pack");
if pinned_redactions > 0 {
println!(
"Pinned {pinned_redactions} redaction tombstone(s) — never collected by GC"
);
}
}
} else {
let (packed_count, bytes_saved) = repo.store().pack_objects(aggressive)?;
summary.packed_count = packed_count;
summary.bytes_saved = bytes_saved;
if !json {
if packed_count > 0 {
println!(
"Packed {} objects (saved {} bytes)",
packed_count, bytes_saved
);
} else {
println!("No objects to pack");
}
}
repo.refs().pack_refs()?;
#[cfg(feature = "git-overlay")]
{
let mut bridge = GitBridge::new(&repo);
if bridge.is_initialized() {
let removed = bridge.prune_unreachable_mapping_entries()?;
summary.pruned_git_mapping_entries = removed;
if !json && removed > 0 {
println!("Pruned {removed} stale Git-overlay mapping entries");
}
// Consolidate the git-overlay mirror (`.heddle/git`): pack its
// loose objects and drop the redundant loose copies. The mirror
// is a separate object store (sley's git ODB) from heddle's
// native store packed above, and accumulates a loose object per
// minted/imported commit, tree, and blob — the dominant
// uninstrumented read cost. Lossless + OID-preserving (packs
// every object on disk, content-addressed); see
// `GitBridge::consolidate_mirror`.
let consolidated = bridge.consolidate_mirror()?;
summary.consolidated_mirror_loose = consolidated;
if !json && consolidated > 0 {
println!(
"Consolidated {consolidated} loose Git-overlay mirror objects into a pack"
);
}
}
}
// Consolidation prune: drop the loose copies of objects that now
// live in the pack we just wrote. This is intrinsic to what a GC
// *is* — a GC that packs without pruning leaves every object in
// BOTH places, so the object store has strictly more sources to
// search and read commands (status/diff/verification) get slower
// instead of faster. The prune only removes loose objects whose
// canonical copy is now in a pack, so it never loses data
// (fsck stays clean). It therefore runs unconditionally, not
// behind `--prune`. The `prune`/`aggressive` flags are retained
// for callers/scripts but no longer gate this safe step.
let _ = prune;
let (removed, bytes_freed) = repo.store().prune_loose_objects()?;
summary.pruned_loose = removed;
summary.bytes_freed = bytes_freed;
if !json {
if removed > 0 {
println!(
"Pruned {} redundant loose objects (freed {} bytes)",
removed, bytes_freed
);
} else {
println!("No loose objects to prune");
}
}
// Post-GC invariant: every redaction we saw at the start of
// this run must still exist. We compare by (blob_hash,
// redaction_count) — if a record disappeared, GC's structural
// boundary was breached and the next reader would see secrets
// we promised to hide.
let redactions_after = repo.list_all_redactions().unwrap_or_default();
let before_index: std::collections::HashMap<_, _> = redactions_before
.iter()
.map(|(blob, b)| (*blob, b.redactions.len()))
.collect();
for (blob, after_blob) in &redactions_after {
let before_count = before_index.get(blob).copied().unwrap_or(0);
if after_blob.redactions.len() < before_count {
anyhow::bail!(
"GC invariant violated: redactions on blob {} dropped from {} to {} — \
refusing to claim a successful GC",
blob.short(),
before_count,
after_blob.redactions.len()
);
}
}
for (blob, _) in &redactions_before {
if !redactions_after.iter().any(|(b, _)| b == blob) {
anyhow::bail!(
"GC invariant violated: redactions file for blob {} disappeared — \
refusing to claim a successful GC",
blob.short()
);
}
}
if pinned_redactions > 0 {
summary.preserved_redactions = pinned_redactions;
if !json {
println!(
"Preserved {pinned_redactions} redaction tombstone(s) across GC \
(structurally outside the object store)"
);
}
}
}
if json {
write_json_stdout(&summary)?;
}
Ok(())
}