1use std::{collections::HashSet, fs, path::PathBuf};
26
27use anyhow::{Context, Result};
28use chrono::Utc;
29use objects::{
30 fs_atomic::write_file_atomic,
31 object::{ChangeId, ContentHash, Principal, Redaction, RedactionsBlob, Tree},
32};
33
34use crate::repository::Repository;
35
36#[derive(Debug, Clone)]
39pub struct PurgeOutcome {
40 pub redaction_id: Option<ContentHash>,
45 pub redactions_marked: usize,
49 pub blob_bytes_removed: bool,
53 pub blob_remains_in_pack: bool,
58}
59
60impl Repository {
61 pub fn put_redaction(&self, redaction: Redaction) -> Result<ContentHash> {
67 let blob = redaction.redacted_blob;
68 let mut existing = self.get_redactions_for_blob(&blob)?;
69
70 let id = redaction_content_hash(&redaction)?;
73
74 for existing_redaction in &existing.redactions {
77 let existing_id = redaction_content_hash(existing_redaction)?;
78 if existing_id == id {
79 return Ok(id);
80 }
81 }
82
83 existing.push(redaction);
84 let bytes = existing
85 .encode()
86 .with_context(|| "encoding redactions blob")?;
87 let path = self.redaction_path_for_blob(&blob);
88 if let Some(parent) = path.parent() {
89 fs::create_dir_all(parent).with_context(|| format!("create '{}'", parent.display()))?;
90 }
91 write_file_atomic(&path, &bytes).with_context(|| format!("write '{}'", path.display()))?;
92 Ok(id)
93 }
94
95 pub fn get_redactions_for_blob(&self, blob: &ContentHash) -> Result<RedactionsBlob> {
99 let path = self.redaction_path_for_blob(blob);
100 if !path.exists() {
101 return Ok(RedactionsBlob::empty());
102 }
103 let bytes = fs::read(&path).with_context(|| format!("read '{}'", path.display()))?;
104 RedactionsBlob::decode(&bytes).with_context(|| format!("decode '{}'", path.display()))
105 }
106
107 pub fn list_all_redactions(&self) -> Result<Vec<(ContentHash, RedactionsBlob)>> {
111 let dir = self.redactions_dir();
112 if !dir.exists() {
113 return Ok(Vec::new());
114 }
115 let mut out = Vec::new();
116 for entry in fs::read_dir(&dir).with_context(|| format!("read '{}'", dir.display()))? {
117 let entry = entry.with_context(|| format!("entry in '{}'", dir.display()))?;
118 let path = entry.path();
119 let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
120 continue;
121 };
122 if path.extension().and_then(|e| e.to_str()) != Some("bin") {
126 continue;
127 }
128 let Ok(blob) = parse_blob_hash_hex(stem) else {
129 continue;
130 };
131 let bytes = fs::read(&path).with_context(|| format!("read '{}'", path.display()))?;
132 let blob_obj = RedactionsBlob::decode(&bytes)
133 .with_context(|| format!("decode '{}'", path.display()))?;
134 out.push((blob, blob_obj));
135 }
136 Ok(out)
137 }
138
139 pub fn get_redaction(
148 &self,
149 redaction_id: &ContentHash,
150 ) -> Result<Option<(ContentHash, Redaction)>> {
151 for (blob, redactions_blob) in self.list_all_redactions()? {
152 for redaction in &redactions_blob.redactions {
153 let id = redaction_content_hash(redaction)?;
154 if id == *redaction_id {
155 return Ok(Some((blob, redaction.clone())));
156 }
157 }
158 }
159 Ok(None)
160 }
161
162 pub fn purge_blob(&self, blob: &ContentHash, _purger: &Principal) -> Result<PurgeOutcome> {
174 let mut redactions_blob = self.get_redactions_for_blob(blob)?;
175 if redactions_blob.redactions.is_empty() {
176 anyhow::bail!(
177 "no redaction exists for blob {} — declare one with `heddle redact` first",
178 blob.short()
179 );
180 }
181 let now = Utc::now();
182 let redactions_marked = redactions_blob.mark_all_purged(now);
183 let latest_id = match redactions_blob.latest() {
184 Some(latest) => Some(redaction_content_hash(latest)?),
185 None => None,
186 };
187 let bytes = redactions_blob
191 .encode()
192 .with_context(|| "re-encode redactions blob after purge mark")?;
193 let path = self.redaction_path_for_blob(blob);
194 write_file_atomic(&path, &bytes).with_context(|| format!("write '{}'", path.display()))?;
195
196 let (blob_bytes_removed, blob_remains_in_pack) = remove_loose_blob_bytes(self, blob)?;
201
202 Ok(PurgeOutcome {
203 redaction_id: latest_id,
204 redactions_marked,
205 blob_bytes_removed,
206 blob_remains_in_pack,
207 })
208 }
209
210 pub(crate) fn redactions_dir(&self) -> PathBuf {
212 self.heddle_dir().join("redactions")
213 }
214
215 pub(crate) fn redaction_path_for_blob(&self, blob: &ContentHash) -> PathBuf {
218 self.redactions_dir()
219 .join(format!("{}.bin", hex_encode_content_hash(blob)))
220 }
221
222 pub fn redaction_stub_for_blob(&self, blob: &ContentHash) -> Result<Option<String>> {
231 let redactions = self.get_redactions_for_blob(blob)?;
232 if !redactions.has_active() {
233 return Ok(None);
234 }
235 let latest = redactions
236 .latest()
237 .expect("non-empty redactions blob has a latest entry");
238 let id = redaction_content_hash(latest)?;
239 Ok(Some(latest.stub_text(&id)))
240 }
241
242 pub fn reachable_states(&self) -> Result<Vec<ChangeId>> {
253 let refs = self.refs();
254 let mut roots: Vec<ChangeId> = Vec::new();
255 for name in refs
256 .list_threads()
257 .with_context(|| "list threads for reachable_states")?
258 {
259 if let Some(tip) = refs
260 .get_thread(&name)
261 .with_context(|| format!("read thread '{name}'"))?
262 {
263 roots.push(tip);
264 }
265 }
266 for name in refs
267 .list_markers()
268 .with_context(|| "list markers for reachable_states")?
269 {
270 if let Some(tip) = refs
271 .get_marker(&name)
272 .with_context(|| format!("read marker '{name}'"))?
273 {
274 roots.push(tip);
275 }
276 }
277
278 let mut visited: HashSet<ChangeId> = HashSet::new();
279 let mut queue: Vec<ChangeId> = Vec::new();
280 for root in roots {
281 if visited.insert(root) {
282 queue.push(root);
283 }
284 }
285 let mut out: Vec<ChangeId> = Vec::new();
286 while let Some(id) = queue.pop() {
287 let Some(state) = self
290 .store()
291 .get_state(&id)
292 .with_context(|| format!("load state {} for reachable walk", id.short()))?
293 else {
294 continue;
295 };
296 out.push(id);
297 for parent in &state.parents {
298 if visited.insert(*parent) {
299 queue.push(*parent);
300 }
301 }
302 }
303 Ok(out)
304 }
305
306 pub fn paths_to_blob_in_state(
314 &self,
315 state: &ChangeId,
316 target: &ContentHash,
317 ) -> Result<Vec<String>> {
318 let Some(tree) = self
319 .get_tree_for_state(state)
320 .with_context(|| format!("load tree for state {}", state.short()))?
321 else {
322 return Ok(Vec::new());
323 };
324 let mut out: Vec<String> = Vec::new();
325 walk_tree_for_blob(self, &tree, "", target, &mut out)?;
326 Ok(out)
327 }
328}
329
330fn walk_tree_for_blob(
335 repo: &Repository,
336 tree: &Tree,
337 prefix: &str,
338 target: &ContentHash,
339 out: &mut Vec<String>,
340) -> Result<()> {
341 for entry in tree.iter() {
342 let path = if prefix.is_empty() {
343 entry.name.clone()
344 } else {
345 format!("{prefix}/{}", entry.name)
346 };
347 if entry.is_blob() {
348 if entry.hash == *target {
349 out.push(path);
350 }
351 continue;
352 }
353 if entry.is_tree() {
354 let Some(subtree) = repo
355 .store()
356 .get_tree(&entry.hash)
357 .with_context(|| format!("load subtree {}", entry.hash.short()))?
358 else {
359 continue;
361 };
362 walk_tree_for_blob(repo, &subtree, &path, target, out)?;
363 }
364 }
365 Ok(())
366}
367
368fn redaction_content_hash(redaction: &Redaction) -> Result<ContentHash> {
373 let single = RedactionsBlob::new(vec![redaction.clone()]);
377 let bytes = single
378 .encode()
379 .with_context(|| "encode single-redaction for content addressing")?;
380 let digest = blake3::hash(&bytes);
381 Ok(ContentHash::from_bytes(*digest.as_bytes()))
382}
383
384fn hex_encode_content_hash(hash: &ContentHash) -> String {
385 let bytes = hash.as_bytes();
386 let mut out = String::with_capacity(bytes.len() * 2);
387 for b in bytes {
388 use std::fmt::Write as _;
389 let _ = write!(out, "{:02x}", b);
390 }
391 out
392}
393
394fn parse_blob_hash_hex(hex: &str) -> Result<ContentHash> {
395 if hex.len() != 64 {
396 anyhow::bail!("invalid blob-hash hex length: {}", hex.len());
397 }
398 let mut bytes = [0u8; 32];
399 for i in 0..32 {
400 let slice = &hex[i * 2..i * 2 + 2];
401 bytes[i] = u8::from_str_radix(slice, 16)
402 .with_context(|| format!("invalid hex byte at offset {}", i * 2))?;
403 }
404 Ok(ContentHash::from_bytes(bytes))
405}
406
407fn remove_loose_blob_bytes(repo: &Repository, hash: &ContentHash) -> Result<(bool, bool)> {
411 let store = repo.store();
412 if let Some(path) = store.loose_blob_path(hash)
413 && path.exists()
414 {
415 fs::remove_file(&path)
416 .with_context(|| format!("remove loose blob '{}'", path.display()))?;
417 return Ok((true, false));
423 }
424 Ok((false, false))
425}
426
427#[cfg(test)]
428mod tests {
429 use chrono::TimeZone;
430 use objects::object::{ChangeId, Principal};
431 use tempfile::TempDir;
432
433 use super::*;
434
435 fn fresh_repo() -> (TempDir, Repository) {
436 let dir = TempDir::new().unwrap();
437 let repo = Repository::init_default(dir.path()).unwrap();
438 (dir, repo)
439 }
440
441 fn sample_principal() -> Principal {
442 Principal {
443 name: "Anan".into(),
444 email: "anan@heddle.sh".into(),
445 }
446 }
447
448 fn sample_blob() -> ContentHash {
449 ContentHash::from_bytes([7u8; 32])
450 }
451
452 fn sample_redaction() -> Redaction {
453 Redaction {
454 redacted_blob: sample_blob(),
455 state: ChangeId::from_bytes([1u8; 16]),
456 path: "config/secrets.toml".into(),
457 reason: "leaked credential".into(),
458 redactor: sample_principal(),
459 redacted_at: Utc.with_ymd_and_hms(2026, 5, 10, 14, 33, 0).unwrap(),
460 signature: None,
461 purged_at: None,
462 supersedes: None,
463 }
464 }
465
466 #[test]
467 fn put_redaction_writes_blob_and_returns_stable_id() {
468 let (_dir, repo) = fresh_repo();
469 let r = sample_redaction();
470 let id1 = repo.put_redaction(r.clone()).expect("put redaction");
471 let id2 = repo.put_redaction(r.clone()).expect("re-put redaction");
474 assert_eq!(
475 id1, id2,
476 "put_redaction must be idempotent on identical input"
477 );
478
479 let stored = repo
480 .get_redactions_for_blob(&sample_blob())
481 .expect("get redactions");
482 assert_eq!(
483 stored.redactions.len(),
484 1,
485 "idempotent put must not duplicate entries"
486 );
487 }
488
489 #[test]
490 fn list_all_redactions_returns_every_blob_with_a_record() {
491 let (_dir, repo) = fresh_repo();
492 let r = sample_redaction();
493 repo.put_redaction(r.clone()).unwrap();
494 let listing = repo.list_all_redactions().expect("list all redactions");
495 assert_eq!(listing.len(), 1);
496 assert_eq!(listing[0].0, sample_blob());
497 assert_eq!(listing[0].1.redactions.len(), 1);
498 }
499
500 #[test]
501 fn get_redaction_finds_by_id_or_returns_none() {
502 let (_dir, repo) = fresh_repo();
503 let id = repo.put_redaction(sample_redaction()).unwrap();
504 let found = repo
505 .get_redaction(&id)
506 .expect("lookup by id")
507 .expect("present");
508 assert_eq!(found.0, sample_blob());
509 let unknown = ContentHash::from_bytes([0u8; 32]);
510 let missing = repo.get_redaction(&unknown).expect("lookup miss");
511 assert!(
512 missing.is_none(),
513 "lookup of unknown id must return None, not error"
514 );
515 }
516
517 #[test]
518 fn purge_blob_refuses_when_no_redaction_exists() {
519 let (_dir, repo) = fresh_repo();
520 let err = repo
521 .purge_blob(&sample_blob(), &sample_principal())
522 .expect_err("purge without redaction must refuse");
523 let msg = err.to_string();
524 assert!(
525 msg.contains("no redaction"),
526 "error must name the missing-redaction precondition, got: {msg}"
527 );
528 }
529
530 #[test]
531 fn purge_blob_marks_redactions_purged_after_redact() {
532 let (_dir, repo) = fresh_repo();
533 repo.put_redaction(sample_redaction()).unwrap();
534 let outcome = repo
535 .purge_blob(&sample_blob(), &sample_principal())
536 .expect("purge after redact");
537 assert_eq!(outcome.redactions_marked, 1);
538 assert!(outcome.redaction_id.is_some());
539
540 let stored = repo
542 .get_redactions_for_blob(&sample_blob())
543 .expect("get redactions");
544 assert!(
545 stored.redactions.iter().all(|r| r.is_purged()),
546 "every redaction on a purged blob must be marked purged"
547 );
548
549 let again = repo
552 .purge_blob(&sample_blob(), &sample_principal())
553 .expect("re-purge");
554 assert_eq!(again.redactions_marked, 0);
555 }
556}