Skip to main content

objects/object/
redaction.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Redaction — a declaration that a blob in a state is sensitive and must
3//! materialize as a stub instead of its content.
4//!
5//! Redaction is *additive*: a new object that supersedes a read of the
6//! original. The blob's bytes stay on disk until `heddle purge` explicitly
7//! removes them; the redaction itself is the readers' contract that those
8//! bytes are no longer accessible through the materialize path.
9//!
10//! Distinct from review signatures and state signatures:
11//! - [`StateSignature`](crate::object::StateSignature) authenticates a state's authorship.
12//! - [`ReviewSignature`](crate::object::ReviewSignature) authenticates that a state was reviewed.
13//! - [`Redaction`] is itself a signable operation — it claims that a specific
14//!   blob in a specific state should no longer materialize. The signature
15//!   binds operator → declaration so audits can trace who hid what when.
16
17use chrono::{DateTime, Utc};
18use serde::{Deserialize, Serialize};
19
20use crate::object::{ChangeId, ContentHash, Principal, StateSignature};
21
22/// Stable byte prefix the signing payload begins with. Bumping this versions
23/// the payload format itself; old signatures with the old prefix continue to
24/// verify exactly as they did when written.
25pub const REDACTION_SIGNING_PAYLOAD_VERSION_TAG: &[u8] = b"hd-redact-v1\x00";
26
27/// A redaction declaration on a single blob in a single state.
28#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
29pub struct Redaction {
30    /// The blob whose bytes should no longer materialize.
31    pub redacted_blob: ContentHash,
32    /// The state in which the path resides. A redaction is *scoped* to the
33    /// (blob, state, path) triple; `--all-states` produces one redaction
34    /// per matching state.
35    pub state: ChangeId,
36    /// Path within the state's tree where the blob lives.
37    pub path: String,
38    /// Operator-supplied reason ("leaked credential", "PII", ...).
39    pub reason: String,
40    /// Who declared the redaction.
41    pub redactor: Principal,
42    /// When the redaction was declared. RFC3339 string at the wire format
43    /// boundary; `DateTime<Utc>` internally.
44    pub redacted_at: DateTime<Utc>,
45    /// Optional cryptographic signature over the canonical signing payload
46    /// (see [`canonical_signing_payload`]). `None` for unsigned redactions
47    /// (still recorded in the oplog, still surfaced in materialize, but
48    /// reviewers will see them flagged unsigned).
49    #[serde(default)]
50    pub signature: Option<StateSignature>,
51    /// When `heddle purge` removed the underlying blob bytes. `None` while
52    /// the redaction is declared-but-bytes-still-on-disk.
53    #[serde(default)]
54    pub purged_at: Option<DateTime<Utc>>,
55    /// The redaction this one supersedes, if any — for chains where the
56    /// reason or scope was updated. Identified by the prior redaction's
57    /// content hash.
58    #[serde(default)]
59    pub supersedes: Option<ContentHash>,
60}
61
62impl Redaction {
63    /// Build the canonical bytes a signer covers. Anything outside this
64    /// payload (e.g. `purged_at`, `signature` itself) is intentionally
65    /// excluded — purges happen after signing, and the signature can't sign
66    /// itself.
67    pub fn canonical_signing_payload(&self) -> Vec<u8> {
68        let mut buf = Vec::with_capacity(256);
69        buf.extend_from_slice(REDACTION_SIGNING_PAYLOAD_VERSION_TAG);
70        buf.extend_from_slice(self.redacted_blob.as_bytes());
71        buf.extend_from_slice(self.state.as_bytes());
72        buf.extend_from_slice(self.path.as_bytes());
73        buf.push(0);
74        buf.extend_from_slice(self.reason.as_bytes());
75        buf.push(0);
76        buf.extend_from_slice(self.redactor.name.as_bytes());
77        buf.push(0);
78        buf.extend_from_slice(self.redactor.email.as_bytes());
79        buf.push(0);
80        buf.extend_from_slice(self.redacted_at.to_rfc3339().as_bytes());
81        if let Some(supersedes) = &self.supersedes {
82            buf.extend_from_slice(supersedes.as_bytes());
83        }
84        buf
85    }
86
87    /// Mark the redaction as purged. Returns `true` if the state changed
88    /// (`false` if already purged — callers can use this for idempotency).
89    pub fn mark_purged(&mut self, at: DateTime<Utc>) -> bool {
90        if self.purged_at.is_some() {
91            false
92        } else {
93            self.purged_at = Some(at);
94            true
95        }
96    }
97
98    /// Whether the blob bytes are gone from local storage.
99    pub fn is_purged(&self) -> bool {
100        self.purged_at.is_some()
101    }
102
103    /// Format the stub a reader sees instead of the redacted blob content.
104    /// Plain text, ASCII-only, safe to embed in materialized worktrees and
105    /// downstream Git exports.
106    pub fn stub_text(&self, redaction_id: &ContentHash) -> String {
107        let mut out = String::with_capacity(256);
108        out.push_str("# This file was redacted by Heddle.\n");
109        out.push_str(&format!(
110            "# redacted-at: {}\n",
111            self.redacted_at.to_rfc3339()
112        ));
113        out.push_str(&format!(
114            "# redactor:    {} <{}>\n",
115            self.redactor.name, self.redactor.email
116        ));
117        out.push_str(&format!("# reason:      {}\n", self.reason));
118        out.push_str(&format!("# redaction:   {}\n", redaction_id.short()));
119        if let Some(purged_at) = self.purged_at {
120            out.push_str(&format!("# purged-at:   {}\n", purged_at.to_rfc3339()));
121            out.push_str("# The original bytes have been purged from local storage.\n");
122        } else {
123            out.push_str("# The original bytes remain on disk pending purge.\n");
124        }
125        out
126    }
127}
128
129/// On-disk blob containing all redactions for a single blob hash. One file
130/// per redacted blob, encoded with `rmp-serde` — matches the
131/// [`ReviewSignaturesBlob`](crate::object::ReviewSignaturesBlob) pattern.
132#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
133pub struct RedactionsBlob {
134    pub format_version: u8,
135    pub redactions: Vec<Redaction>,
136}
137
138impl RedactionsBlob {
139    pub const FORMAT_VERSION: u8 = 1;
140
141    pub fn new(redactions: Vec<Redaction>) -> Self {
142        Self {
143            format_version: Self::FORMAT_VERSION,
144            redactions,
145        }
146    }
147
148    pub fn empty() -> Self {
149        Self::new(Vec::new())
150    }
151
152    pub fn encode(&self) -> Result<Vec<u8>, RedactionError> {
153        rmp_serde::to_vec(self).map_err(|err| RedactionError::Encoding(err.to_string()))
154    }
155
156    pub fn decode(bytes: &[u8]) -> Result<Self, RedactionError> {
157        rmp_serde::from_slice(bytes).map_err(|err| RedactionError::Decoding(err.to_string()))
158    }
159
160    pub fn push(&mut self, redaction: Redaction) {
161        self.redactions.push(redaction);
162    }
163
164    /// `true` iff any redaction in this blob is non-superseded — i.e. the
165    /// reader should see the stub. Today every redaction is active; a
166    /// future "unredact" verb would skip the superseded ones.
167    pub fn has_active(&self) -> bool {
168        !self.redactions.is_empty()
169    }
170
171    /// The most recent redaction, by `redacted_at`. Used as the canonical
172    /// stub source when multiple redactions exist for the same blob (e.g.
173    /// because of `--all-states` plus a later refinement).
174    pub fn latest(&self) -> Option<&Redaction> {
175        self.redactions.iter().max_by_key(|r| r.redacted_at)
176    }
177
178    /// Mark every redaction in this blob as purged. Returns the count that
179    /// actually transitioned (others were already purged).
180    pub fn mark_all_purged(&mut self, at: DateTime<Utc>) -> usize {
181        let mut transitioned = 0;
182        for redaction in &mut self.redactions {
183            if redaction.mark_purged(at) {
184                transitioned += 1;
185            }
186        }
187        transitioned
188    }
189}
190
191/// Errors produced while encoding/decoding redactions.
192#[derive(Debug, thiserror::Error)]
193pub enum RedactionError {
194    #[error("encoding redaction: {0}")]
195    Encoding(String),
196    #[error("decoding redaction: {0}")]
197    Decoding(String),
198}
199
200#[cfg(test)]
201mod tests {
202    use chrono::TimeZone;
203
204    use super::*;
205
206    fn principal() -> Principal {
207        Principal {
208            name: "Grace Hopper".into(),
209            email: "grace@example.com".into(),
210        }
211    }
212
213    fn blob_hash() -> ContentHash {
214        ContentHash::from_bytes([7u8; 32])
215    }
216
217    fn redaction(blob: ContentHash, reason: &str) -> Redaction {
218        Redaction {
219            redacted_blob: blob,
220            state: ChangeId::from_bytes([1u8; 16]),
221            path: "config/secrets.toml".into(),
222            reason: reason.into(),
223            redactor: principal(),
224            redacted_at: Utc.with_ymd_and_hms(2026, 5, 10, 14, 33, 0).unwrap(),
225            signature: None,
226            purged_at: None,
227            supersedes: None,
228        }
229    }
230
231    #[test]
232    fn round_trips_through_msgpack() {
233        let blob = blob_hash();
234        let original = RedactionsBlob::new(vec![redaction(blob, "leaked credential")]);
235        let encoded = original.encode().expect("encode");
236        let decoded = RedactionsBlob::decode(&encoded).expect("decode");
237        assert_eq!(decoded, original);
238        // Format-version is load-bearing: future readers branch on it.
239        assert_eq!(decoded.format_version, RedactionsBlob::FORMAT_VERSION);
240    }
241
242    #[test]
243    fn canonical_payload_stable_across_field_reordering() {
244        // The signing payload concatenates fields in a fixed order. If we
245        // accidentally derive serialization from struct-field declaration
246        // order alone (rmp-serde's default), reordering the struct would
247        // silently invalidate every existing signature. The explicit
248        // `canonical_signing_payload` is the contract; this test pins it.
249        let r = redaction(blob_hash(), "leaked credential");
250        let payload = r.canonical_signing_payload();
251        // Tag prefix at the front; gives us a versioned signing domain.
252        assert!(payload.starts_with(REDACTION_SIGNING_PAYLOAD_VERSION_TAG));
253        // Reason text is in the payload — otherwise an operator could
254        // re-sign a redaction with a different reason.
255        let payload_text = String::from_utf8_lossy(&payload);
256        assert!(payload_text.contains("leaked credential"));
257        assert!(payload_text.contains("config/secrets.toml"));
258        // RFC3339 timestamp string is included — fixed timezone, fixed
259        // precision, so the payload is reproducible across runs.
260        assert!(payload_text.contains("2026-05-10T14:33:00+00:00"));
261    }
262
263    #[test]
264    fn mark_purged_is_idempotent_and_observable() {
265        let mut r = redaction(blob_hash(), "leaked credential");
266        let at = Utc.with_ymd_and_hms(2026, 5, 11, 0, 0, 0).unwrap();
267        assert!(!r.is_purged());
268        assert!(r.mark_purged(at));
269        assert!(r.is_purged());
270        // Second call is a no-op — operators can safely retry purge
271        // without distorting the `purged_at` audit trail.
272        assert!(!r.mark_purged(Utc.with_ymd_and_hms(2026, 5, 12, 0, 0, 0).unwrap()));
273        assert_eq!(r.purged_at, Some(at));
274    }
275
276    #[test]
277    fn stub_text_mentions_redactor_reason_and_purge_state() {
278        let r = redaction(blob_hash(), "leaked credential");
279        let stub = r.stub_text(&blob_hash());
280        // The stub is the ONLY thing readers see for redacted files. It
281        // must carry every field a reviewer would want: who, when, why,
282        // and whether the bytes are still recoverable.
283        assert!(stub.contains("Grace Hopper"));
284        assert!(stub.contains("grace@example.com"));
285        assert!(stub.contains("leaked credential"));
286        assert!(stub.contains("# redacted-at:"));
287        assert!(stub.contains("# redaction:"));
288        // Pre-purge, the stub should explicitly say bytes remain.
289        assert!(stub.contains("remain on disk pending purge"));
290
291        let mut purged = r.clone();
292        purged.mark_purged(Utc.with_ymd_and_hms(2026, 5, 11, 0, 0, 0).unwrap());
293        let purged_stub = purged.stub_text(&blob_hash());
294        assert!(purged_stub.contains("# purged-at:"));
295        assert!(purged_stub.contains("purged from local storage"));
296    }
297
298    #[test]
299    fn latest_picks_the_most_recent() {
300        let early = redaction(blob_hash(), "first pass");
301        let late = Redaction {
302            redacted_at: Utc.with_ymd_and_hms(2026, 5, 12, 9, 0, 0).unwrap(),
303            reason: "tighter scope".into(),
304            ..redaction(blob_hash(), "tighter scope")
305        };
306        let blob = RedactionsBlob::new(vec![early, late.clone()]);
307        assert_eq!(blob.latest().unwrap(), &late);
308    }
309}
310
311#[cfg(test)]
312mod proptests {
313    //! Property tests for the redaction primitive's data model.
314    //!
315    //! These match the build brief's "Property tests" acceptance
316    //! criteria (`.agents/redaction-primitive.md`):
317    //!
318    //!   1. Encode → decode round-trips losslessly for any well-formed
319    //!      redaction.
320    //!   2. `canonical_signing_payload` is deterministic across clones
321    //!      and stable across `Redaction` field reordering — the
322    //!      contract that lets signatures verify.
323    //!   3. `mark_purged` is idempotent: replaying the call with any
324    //!      later timestamp does not move `purged_at`.
325    //!   4. `stub_text` always carries the redaction id, the reason,
326    //!      and the redactor email, no matter what content went in.
327    //!
328    //! Running with the standard proptest budget produces ~256 cases
329    //! per property by default.
330    use proptest::prelude::*;
331
332    use super::*;
333
334    fn arb_principal() -> impl Strategy<Value = Principal> {
335        // Names + emails are ASCII-printable, length-bounded. We're
336        // not testing unicode tolerance here — the redaction store's
337        // contract is "whatever the principal source serves us" and
338        // we want determinism, not exhaustive locale coverage.
339        let name = "[A-Za-z][A-Za-z0-9 _-]{0,30}";
340        let email = "[a-z][a-z0-9_-]{0,15}@[a-z0-9.-]{1,30}\\.[a-z]{2,4}";
341        (name, email).prop_map(|(name, email)| Principal { name, email })
342    }
343
344    fn arb_blob_hash() -> impl Strategy<Value = ContentHash> {
345        any::<[u8; 32]>().prop_map(ContentHash::from_bytes)
346    }
347
348    fn arb_change_id() -> impl Strategy<Value = ChangeId> {
349        any::<[u8; 16]>().prop_map(ChangeId::from_bytes)
350    }
351
352    fn arb_redaction() -> impl Strategy<Value = Redaction> {
353        // Timestamp range is bounded to keep RFC3339 formatting stable
354        // (chrono's print is fine, but the test outputs are easier to
355        // diff with a narrow window). Year 2000–2100 is plenty.
356        let secs = 946_684_800i64..4_102_444_800i64;
357        (
358            arb_blob_hash(),
359            arb_change_id(),
360            "[A-Za-z0-9._/-]{1,40}",
361            "[A-Za-z0-9 ._:'-]{0,80}",
362            arb_principal(),
363            secs,
364            prop::option::of(arb_blob_hash()),
365        )
366            .prop_map(|(blob, state, path, reason, redactor, secs, supersedes)| {
367                Redaction {
368                    redacted_blob: blob,
369                    state,
370                    path,
371                    reason,
372                    redactor,
373                    redacted_at: chrono::DateTime::<Utc>::from_timestamp(secs, 0)
374                        .expect("in-range timestamp"),
375                    signature: None,
376                    purged_at: None,
377                    supersedes,
378                }
379            })
380    }
381
382    proptest! {
383        /// Encode → decode round-trips. If this breaks, the on-disk
384        /// redaction store can't be read back; the leaked-secret stays
385        /// secret only by accident.
386        #[test]
387        fn encode_decode_roundtrip(r in arb_redaction()) {
388            let blob = RedactionsBlob::new(vec![r.clone()]);
389            let bytes = blob.encode().expect("encode");
390            let decoded = RedactionsBlob::decode(&bytes).expect("decode");
391            prop_assert_eq!(decoded.redactions.len(), 1);
392            prop_assert_eq!(&decoded.redactions[0], &r);
393        }
394
395        /// Canonical signing payload is a pure function of the
396        /// redaction's *content*: cloning the value or rebuilding it
397        /// from the same fields must give bit-identical bytes. This is
398        /// what makes a signature stable across read cycles.
399        #[test]
400        fn canonical_payload_is_deterministic(r in arb_redaction()) {
401            let payload1 = r.canonical_signing_payload();
402            let payload2 = r.clone().canonical_signing_payload();
403            prop_assert_eq!(payload1, payload2);
404        }
405
406        /// `purged_at` is monotonic. Once a redaction is purged, a
407        /// later `mark_purged` call with any timestamp must NOT move
408        /// the field — operators can re-run the purge command (or
409        /// retries can ride a partial failure) without distorting the
410        /// audit trail.
411        #[test]
412        fn mark_purged_is_idempotent(
413            mut r in arb_redaction(),
414            t1_secs in 946_684_800i64..4_000_000_000i64,
415            t2_offset in 0i64..1_000_000_000i64,
416        ) {
417            let t1 = chrono::DateTime::<Utc>::from_timestamp(t1_secs, 0).unwrap();
418            let t2 = chrono::DateTime::<Utc>::from_timestamp(t1_secs + t2_offset, 0).unwrap();
419            prop_assert!(r.mark_purged(t1));
420            prop_assert!(r.is_purged());
421            prop_assert_eq!(r.purged_at, Some(t1));
422            // Second purge with a later timestamp is a no-op.
423            prop_assert!(!r.mark_purged(t2));
424            prop_assert_eq!(r.purged_at, Some(t1));
425        }
426
427        /// The stub a reader sees must always identify the redaction.
428        /// If the stub failed to carry the id or the reason, downstream
429        /// auditors would have no way to trace why a file disappeared.
430        #[test]
431        fn stub_always_carries_id_and_reason(r in arb_redaction()) {
432            let id = ContentHash::from_bytes([0xAB; 32]);
433            let stub = r.stub_text(&id);
434            // The short id is what `heddle redact show` displays;
435            // the stub must echo it for back-reference.
436            prop_assert!(
437                stub.contains(&id.short()),
438                "stub must contain redaction id; got: {stub}"
439            );
440            // Empty reasons are allowed (defensive) but if any reason
441            // text is supplied it must surface in the stub.
442            if !r.reason.is_empty() {
443                prop_assert!(
444                    stub.contains(&r.reason),
445                    "stub must carry reason '{}'; got: {stub}",
446                    r.reason
447                );
448            }
449            // The redactor's email is the durable identifier — the
450            // name might be a display label, but the email survives
451            // rename and is what auditors trace back to.
452            prop_assert!(
453                stub.contains(&r.redactor.email),
454                "stub must carry redactor email '{}'; got: {stub}",
455                r.redactor.email
456            );
457        }
458
459        /// Empty `RedactionsBlob` is consistent: `has_active` returns
460        /// `false`, and `latest` returns `None`. The materialize path
461        /// uses these to decide whether to render a stub — if either
462        /// regressed, redacted files would silently materialize their
463        /// real bytes.
464        #[test]
465        fn empty_blob_is_inert(seed in any::<u8>()) {
466            let _ = seed; // unused; exists to exercise the proptest harness
467            let blob = RedactionsBlob::empty();
468            prop_assert!(!blob.has_active());
469            prop_assert!(blob.latest().is_none());
470        }
471
472        /// Adding redactions makes the blob active. Pin: a single
473        /// non-purged redaction is sufficient — readers must see the
474        /// stub from the moment the first declaration lands.
475        #[test]
476        fn single_redaction_makes_blob_active(r in arb_redaction()) {
477            let blob = RedactionsBlob::new(vec![r]);
478            prop_assert!(blob.has_active());
479            prop_assert!(blob.latest().is_some());
480        }
481    }
482}