objects/object/redaction.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Redaction — a declaration that a blob in a state is sensitive and must
3//! materialize as a stub instead of its content.
4//!
5//! Redaction is *additive*: a new object that supersedes a read of the
6//! original. The blob's bytes stay on disk until `heddle purge` explicitly
7//! removes them; the redaction itself is the readers' contract that those
8//! bytes are no longer accessible through the materialize path.
9//!
10//! Distinct from review signatures and state signatures:
11//! - [`StateSignature`](crate::object::StateSignature) authenticates a state's authorship.
12//! - [`ReviewSignature`](crate::object::ReviewSignature) authenticates that a state was reviewed.
13//! - [`Redaction`] is itself a signable operation — it claims that a specific
14//! blob in a specific state should no longer materialize. The signature
15//! binds operator → declaration so audits can trace who hid what when.
16
17use chrono::{DateTime, Utc};
18use serde::{Deserialize, Serialize};
19
20use crate::object::{ChangeId, ContentHash, Principal, StateSignature};
21
22/// Stable byte prefix the signing payload begins with. Bumping this versions
23/// the payload format itself; old signatures with the old prefix continue to
24/// verify exactly as they did when written.
25pub const REDACTION_SIGNING_PAYLOAD_VERSION_TAG: &[u8] = b"hd-redact-v1\x00";
26
27/// A redaction declaration on a single blob in a single state.
28#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
29pub struct Redaction {
30 /// The blob whose bytes should no longer materialize.
31 pub redacted_blob: ContentHash,
32 /// The state in which the path resides. A redaction is *scoped* to the
33 /// (blob, state, path) triple; `--all-states` produces one redaction
34 /// per matching state.
35 pub state: ChangeId,
36 /// Path within the state's tree where the blob lives.
37 pub path: String,
38 /// Operator-supplied reason ("leaked credential", "PII", ...).
39 pub reason: String,
40 /// Who declared the redaction.
41 pub redactor: Principal,
42 /// When the redaction was declared. RFC3339 string at the wire format
43 /// boundary; `DateTime<Utc>` internally.
44 pub redacted_at: DateTime<Utc>,
45 /// Optional cryptographic signature over the canonical signing payload
46 /// (see [`canonical_signing_payload`]). `None` for unsigned redactions
47 /// (still recorded in the oplog, still surfaced in materialize, but
48 /// reviewers will see them flagged unsigned).
49 #[serde(default)]
50 pub signature: Option<StateSignature>,
51 /// When `heddle purge` removed the underlying blob bytes. `None` while
52 /// the redaction is declared-but-bytes-still-on-disk.
53 #[serde(default)]
54 pub purged_at: Option<DateTime<Utc>>,
55 /// The redaction this one supersedes, if any — for chains where the
56 /// reason or scope was updated. Identified by the prior redaction's
57 /// content hash.
58 #[serde(default)]
59 pub supersedes: Option<ContentHash>,
60}
61
62impl Redaction {
63 /// Build the canonical bytes a signer covers. Anything outside this
64 /// payload (e.g. `purged_at`, `signature` itself) is intentionally
65 /// excluded — purges happen after signing, and the signature can't sign
66 /// itself.
67 pub fn canonical_signing_payload(&self) -> Vec<u8> {
68 let mut buf = Vec::with_capacity(256);
69 buf.extend_from_slice(REDACTION_SIGNING_PAYLOAD_VERSION_TAG);
70 buf.extend_from_slice(self.redacted_blob.as_bytes());
71 buf.extend_from_slice(self.state.as_bytes());
72 buf.extend_from_slice(self.path.as_bytes());
73 buf.push(0);
74 buf.extend_from_slice(self.reason.as_bytes());
75 buf.push(0);
76 buf.extend_from_slice(self.redactor.name.as_bytes());
77 buf.push(0);
78 buf.extend_from_slice(self.redactor.email.as_bytes());
79 buf.push(0);
80 buf.extend_from_slice(self.redacted_at.to_rfc3339().as_bytes());
81 if let Some(supersedes) = &self.supersedes {
82 buf.extend_from_slice(supersedes.as_bytes());
83 }
84 buf
85 }
86
87 /// Mark the redaction as purged. Returns `true` if the state changed
88 /// (`false` if already purged — callers can use this for idempotency).
89 pub fn mark_purged(&mut self, at: DateTime<Utc>) -> bool {
90 if self.purged_at.is_some() {
91 false
92 } else {
93 self.purged_at = Some(at);
94 true
95 }
96 }
97
98 /// Whether the blob bytes are gone from local storage.
99 pub fn is_purged(&self) -> bool {
100 self.purged_at.is_some()
101 }
102
103 /// Format the stub a reader sees instead of the redacted blob content.
104 /// Plain text, ASCII-only, safe to embed in materialized worktrees and
105 /// downstream Git exports.
106 pub fn stub_text(&self, redaction_id: &ContentHash) -> String {
107 let mut out = String::with_capacity(256);
108 out.push_str("# This file was redacted by Heddle.\n");
109 out.push_str(&format!(
110 "# redacted-at: {}\n",
111 self.redacted_at.to_rfc3339()
112 ));
113 out.push_str(&format!(
114 "# redactor: {} <{}>\n",
115 self.redactor.name, self.redactor.email
116 ));
117 out.push_str(&format!("# reason: {}\n", self.reason));
118 out.push_str(&format!("# redaction: {}\n", redaction_id.short()));
119 if let Some(purged_at) = self.purged_at {
120 out.push_str(&format!("# purged-at: {}\n", purged_at.to_rfc3339()));
121 out.push_str("# The original bytes have been purged from local storage.\n");
122 } else {
123 out.push_str("# The original bytes remain on disk pending purge.\n");
124 }
125 out
126 }
127}
128
129/// On-disk blob containing all redactions for a single blob hash. One file
130/// per redacted blob, encoded with `rmp-serde` — matches the
131/// [`ReviewSignaturesBlob`](crate::object::ReviewSignaturesBlob) pattern.
132#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
133pub struct RedactionsBlob {
134 pub format_version: u8,
135 pub redactions: Vec<Redaction>,
136}
137
138impl RedactionsBlob {
139 pub const FORMAT_VERSION: u8 = 1;
140
141 pub fn new(redactions: Vec<Redaction>) -> Self {
142 Self {
143 format_version: Self::FORMAT_VERSION,
144 redactions,
145 }
146 }
147
148 pub fn empty() -> Self {
149 Self::new(Vec::new())
150 }
151
152 pub fn encode(&self) -> Result<Vec<u8>, RedactionError> {
153 rmp_serde::to_vec(self).map_err(|err| RedactionError::Encoding(err.to_string()))
154 }
155
156 pub fn decode(bytes: &[u8]) -> Result<Self, RedactionError> {
157 rmp_serde::from_slice(bytes).map_err(|err| RedactionError::Decoding(err.to_string()))
158 }
159
160 pub fn push(&mut self, redaction: Redaction) {
161 self.redactions.push(redaction);
162 }
163
164 /// `true` iff any redaction in this blob is non-superseded — i.e. the
165 /// reader should see the stub. Today every redaction is active; a
166 /// future "unredact" verb would skip the superseded ones.
167 pub fn has_active(&self) -> bool {
168 !self.redactions.is_empty()
169 }
170
171 /// The most recent redaction, by `redacted_at`. Used as the canonical
172 /// stub source when multiple redactions exist for the same blob (e.g.
173 /// because of `--all-states` plus a later refinement).
174 pub fn latest(&self) -> Option<&Redaction> {
175 self.redactions.iter().max_by_key(|r| r.redacted_at)
176 }
177
178 /// Mark every redaction in this blob as purged. Returns the count that
179 /// actually transitioned (others were already purged).
180 pub fn mark_all_purged(&mut self, at: DateTime<Utc>) -> usize {
181 let mut transitioned = 0;
182 for redaction in &mut self.redactions {
183 if redaction.mark_purged(at) {
184 transitioned += 1;
185 }
186 }
187 transitioned
188 }
189}
190
191/// Errors produced while encoding/decoding redactions.
192#[derive(Debug, thiserror::Error)]
193pub enum RedactionError {
194 #[error("encoding redaction: {0}")]
195 Encoding(String),
196 #[error("decoding redaction: {0}")]
197 Decoding(String),
198}
199
200#[cfg(test)]
201mod tests {
202 use chrono::TimeZone;
203
204 use super::*;
205
206 fn principal() -> Principal {
207 Principal {
208 name: "Anan".into(),
209 email: "anan@heddle.sh".into(),
210 }
211 }
212
213 fn blob_hash() -> ContentHash {
214 ContentHash::from_bytes([7u8; 32])
215 }
216
217 fn redaction(blob: ContentHash, reason: &str) -> Redaction {
218 Redaction {
219 redacted_blob: blob,
220 state: ChangeId::from_bytes([1u8; 16]),
221 path: "config/secrets.toml".into(),
222 reason: reason.into(),
223 redactor: principal(),
224 redacted_at: Utc.with_ymd_and_hms(2026, 5, 10, 14, 33, 0).unwrap(),
225 signature: None,
226 purged_at: None,
227 supersedes: None,
228 }
229 }
230
231 #[test]
232 fn round_trips_through_msgpack() {
233 let blob = blob_hash();
234 let original = RedactionsBlob::new(vec![redaction(blob, "leaked credential")]);
235 let encoded = original.encode().expect("encode");
236 let decoded = RedactionsBlob::decode(&encoded).expect("decode");
237 assert_eq!(decoded, original);
238 // Format-version is load-bearing: future readers branch on it.
239 assert_eq!(decoded.format_version, RedactionsBlob::FORMAT_VERSION);
240 }
241
242 #[test]
243 fn canonical_payload_stable_across_field_reordering() {
244 // The signing payload concatenates fields in a fixed order. If we
245 // accidentally derive serialization from struct-field declaration
246 // order alone (rmp-serde's default), reordering the struct would
247 // silently invalidate every existing signature. The explicit
248 // `canonical_signing_payload` is the contract; this test pins it.
249 let r = redaction(blob_hash(), "leaked credential");
250 let payload = r.canonical_signing_payload();
251 // Tag prefix at the front; gives us a versioned signing domain.
252 assert!(payload.starts_with(REDACTION_SIGNING_PAYLOAD_VERSION_TAG));
253 // Reason text is in the payload — otherwise an operator could
254 // re-sign a redaction with a different reason.
255 let payload_text = String::from_utf8_lossy(&payload);
256 assert!(payload_text.contains("leaked credential"));
257 assert!(payload_text.contains("config/secrets.toml"));
258 // RFC3339 timestamp string is included — fixed timezone, fixed
259 // precision, so the payload is reproducible across runs.
260 assert!(payload_text.contains("2026-05-10T14:33:00+00:00"));
261 }
262
263 #[test]
264 fn mark_purged_is_idempotent_and_observable() {
265 let mut r = redaction(blob_hash(), "leaked credential");
266 let at = Utc.with_ymd_and_hms(2026, 5, 11, 0, 0, 0).unwrap();
267 assert!(!r.is_purged());
268 assert!(r.mark_purged(at));
269 assert!(r.is_purged());
270 // Second call is a no-op — operators can safely retry purge
271 // without distorting the `purged_at` audit trail.
272 assert!(!r.mark_purged(Utc.with_ymd_and_hms(2026, 5, 12, 0, 0, 0).unwrap()));
273 assert_eq!(r.purged_at, Some(at));
274 }
275
276 #[test]
277 fn stub_text_mentions_redactor_reason_and_purge_state() {
278 let r = redaction(blob_hash(), "leaked credential");
279 let stub = r.stub_text(&blob_hash());
280 // The stub is the ONLY thing readers see for redacted files. It
281 // must carry every field a reviewer would want: who, when, why,
282 // and whether the bytes are still recoverable.
283 assert!(stub.contains("Anan"));
284 assert!(stub.contains("anan@heddle.sh"));
285 assert!(stub.contains("leaked credential"));
286 assert!(stub.contains("# redacted-at:"));
287 assert!(stub.contains("# redaction:"));
288 // Pre-purge, the stub should explicitly say bytes remain.
289 assert!(stub.contains("remain on disk pending purge"));
290
291 let mut purged = r.clone();
292 purged.mark_purged(Utc.with_ymd_and_hms(2026, 5, 11, 0, 0, 0).unwrap());
293 let purged_stub = purged.stub_text(&blob_hash());
294 assert!(purged_stub.contains("# purged-at:"));
295 assert!(purged_stub.contains("purged from local storage"));
296 }
297
298 #[test]
299 fn latest_picks_the_most_recent() {
300 let early = redaction(blob_hash(), "first pass");
301 let late = Redaction {
302 redacted_at: Utc.with_ymd_and_hms(2026, 5, 12, 9, 0, 0).unwrap(),
303 reason: "tighter scope".into(),
304 ..redaction(blob_hash(), "tighter scope")
305 };
306 let blob = RedactionsBlob::new(vec![early, late.clone()]);
307 assert_eq!(blob.latest().unwrap(), &late);
308 }
309}
310
311#[cfg(test)]
312mod proptests {
313 //! Property tests for the redaction primitive's data model.
314 //!
315 //! These match the build brief's "Property tests" acceptance
316 //! criteria (`.agents/redaction-primitive.md`):
317 //!
318 //! 1. Encode → decode round-trips losslessly for any well-formed
319 //! redaction.
320 //! 2. `canonical_signing_payload` is deterministic across clones
321 //! and stable across `Redaction` field reordering — the
322 //! contract that lets signatures verify.
323 //! 3. `mark_purged` is idempotent: replaying the call with any
324 //! later timestamp does not move `purged_at`.
325 //! 4. `stub_text` always carries the redaction id, the reason,
326 //! and the redactor email, no matter what content went in.
327 //!
328 //! Running with the standard proptest budget produces ~256 cases
329 //! per property by default.
330 use proptest::prelude::*;
331
332 use super::*;
333
334 fn arb_principal() -> impl Strategy<Value = Principal> {
335 // Names + emails are ASCII-printable, length-bounded. We're
336 // not testing unicode tolerance here — the redaction store's
337 // contract is "whatever the principal source serves us" and
338 // we want determinism, not exhaustive locale coverage.
339 let name = "[A-Za-z][A-Za-z0-9 _-]{0,30}";
340 let email = "[a-z][a-z0-9_-]{0,15}@[a-z0-9.-]{1,30}\\.[a-z]{2,4}";
341 (name, email).prop_map(|(name, email)| Principal { name, email })
342 }
343
344 fn arb_blob_hash() -> impl Strategy<Value = ContentHash> {
345 any::<[u8; 32]>().prop_map(ContentHash::from_bytes)
346 }
347
348 fn arb_change_id() -> impl Strategy<Value = ChangeId> {
349 any::<[u8; 16]>().prop_map(ChangeId::from_bytes)
350 }
351
352 fn arb_redaction() -> impl Strategy<Value = Redaction> {
353 // Timestamp range is bounded to keep RFC3339 formatting stable
354 // (chrono's print is fine, but the test outputs are easier to
355 // diff with a narrow window). Year 2000–2100 is plenty.
356 let secs = 946_684_800i64..4_102_444_800i64;
357 (
358 arb_blob_hash(),
359 arb_change_id(),
360 "[A-Za-z0-9._/-]{1,40}",
361 "[A-Za-z0-9 ._:'-]{0,80}",
362 arb_principal(),
363 secs,
364 prop::option::of(arb_blob_hash()),
365 )
366 .prop_map(|(blob, state, path, reason, redactor, secs, supersedes)| {
367 Redaction {
368 redacted_blob: blob,
369 state,
370 path,
371 reason,
372 redactor,
373 redacted_at: chrono::DateTime::<Utc>::from_timestamp(secs, 0)
374 .expect("in-range timestamp"),
375 signature: None,
376 purged_at: None,
377 supersedes,
378 }
379 })
380 }
381
382 proptest! {
383 /// Encode → decode round-trips. If this breaks, the on-disk
384 /// redaction store can't be read back; the leaked-secret stays
385 /// secret only by accident.
386 #[test]
387 fn encode_decode_roundtrip(r in arb_redaction()) {
388 let blob = RedactionsBlob::new(vec![r.clone()]);
389 let bytes = blob.encode().expect("encode");
390 let decoded = RedactionsBlob::decode(&bytes).expect("decode");
391 prop_assert_eq!(decoded.redactions.len(), 1);
392 prop_assert_eq!(&decoded.redactions[0], &r);
393 }
394
395 /// Canonical signing payload is a pure function of the
396 /// redaction's *content*: cloning the value or rebuilding it
397 /// from the same fields must give bit-identical bytes. This is
398 /// what makes a signature stable across read cycles.
399 #[test]
400 fn canonical_payload_is_deterministic(r in arb_redaction()) {
401 let payload1 = r.canonical_signing_payload();
402 let payload2 = r.clone().canonical_signing_payload();
403 prop_assert_eq!(payload1, payload2);
404 }
405
406 /// `purged_at` is monotonic. Once a redaction is purged, a
407 /// later `mark_purged` call with any timestamp must NOT move
408 /// the field — operators can re-run the purge command (or
409 /// retries can ride a partial failure) without distorting the
410 /// audit trail.
411 #[test]
412 fn mark_purged_is_idempotent(
413 mut r in arb_redaction(),
414 t1_secs in 946_684_800i64..4_000_000_000i64,
415 t2_offset in 0i64..1_000_000_000i64,
416 ) {
417 let t1 = chrono::DateTime::<Utc>::from_timestamp(t1_secs, 0).unwrap();
418 let t2 = chrono::DateTime::<Utc>::from_timestamp(t1_secs + t2_offset, 0).unwrap();
419 prop_assert!(r.mark_purged(t1));
420 prop_assert!(r.is_purged());
421 prop_assert_eq!(r.purged_at, Some(t1));
422 // Second purge with a later timestamp is a no-op.
423 prop_assert!(!r.mark_purged(t2));
424 prop_assert_eq!(r.purged_at, Some(t1));
425 }
426
427 /// The stub a reader sees must always identify the redaction.
428 /// If the stub failed to carry the id or the reason, downstream
429 /// auditors would have no way to trace why a file disappeared.
430 #[test]
431 fn stub_always_carries_id_and_reason(r in arb_redaction()) {
432 let id = ContentHash::from_bytes([0xAB; 32]);
433 let stub = r.stub_text(&id);
434 // The short id is what `heddle redact show` displays;
435 // the stub must echo it for back-reference.
436 prop_assert!(
437 stub.contains(&id.short()),
438 "stub must contain redaction id; got: {stub}"
439 );
440 // Empty reasons are allowed (defensive) but if any reason
441 // text is supplied it must surface in the stub.
442 if !r.reason.is_empty() {
443 prop_assert!(
444 stub.contains(&r.reason),
445 "stub must carry reason '{}'; got: {stub}",
446 r.reason
447 );
448 }
449 // The redactor's email is the durable identifier — the
450 // name might be a display label, but the email survives
451 // rename and is what auditors trace back to.
452 prop_assert!(
453 stub.contains(&r.redactor.email),
454 "stub must carry redactor email '{}'; got: {stub}",
455 r.redactor.email
456 );
457 }
458
459 /// Empty `RedactionsBlob` is consistent: `has_active` returns
460 /// `false`, and `latest` returns `None`. The materialize path
461 /// uses these to decide whether to render a stub — if either
462 /// regressed, redacted files would silently materialize their
463 /// real bytes.
464 #[test]
465 fn empty_blob_is_inert(seed in any::<u8>()) {
466 let _ = seed; // unused; exists to exercise the proptest harness
467 let blob = RedactionsBlob::empty();
468 prop_assert!(!blob.has_active());
469 prop_assert!(blob.latest().is_none());
470 }
471
472 /// Adding redactions makes the blob active. Pin: a single
473 /// non-purged redaction is sufficient — readers must see the
474 /// stub from the moment the first declaration lands.
475 #[test]
476 fn single_redaction_makes_blob_active(r in arb_redaction()) {
477 let blob = RedactionsBlob::new(vec![r]);
478 prop_assert!(blob.has_active());
479 prop_assert!(blob.latest().is_some());
480 }
481 }
482}