use anyhow::Result;
use shiplog_ports::Redactor;
use shiplog_schema::event::EventEnvelope;
use shiplog_schema::workstream::WorkstreamsFile;
use std::path::{Path, PathBuf};
mod alias;
mod policy;
mod profile;
mod projector;
mod repo;
use alias::DeterministicAliasStore;
use projector::{project_events_with_aliases, project_workstreams_with_aliases};
pub use alias::CACHE_FILENAME;
pub use profile::RedactionProfile;
pub struct DeterministicRedactor {
aliases: DeterministicAliasStore,
}
impl DeterministicRedactor {
pub fn new(key: impl AsRef<[u8]>) -> Self {
Self {
aliases: DeterministicAliasStore::new(key),
}
}
pub fn cache_path(out_dir: &Path) -> PathBuf {
DeterministicAliasStore::cache_path(out_dir)
}
pub fn load_cache(&self, path: &Path) -> Result<()> {
self.aliases.load_cache(path)
}
pub fn save_cache(&self, path: &Path) -> Result<()> {
self.aliases.save_cache(path)
}
fn alias(&self, kind: &str, value: &str) -> String {
self.aliases.alias(kind, value)
}
}
impl Redactor for DeterministicRedactor {
fn redact_events(&self, events: &[EventEnvelope], profile: &str) -> Result<Vec<EventEnvelope>> {
let aliases = |kind: &str, value: &str| self.alias(kind, value);
Ok(project_events_with_aliases(events, profile, &aliases))
}
fn redact_workstreams(
&self,
workstreams: &WorkstreamsFile,
profile: &str,
) -> Result<WorkstreamsFile> {
let aliases = |kind: &str, value: &str| self.alias(kind, value);
Ok(project_workstreams_with_aliases(
workstreams,
profile,
&aliases,
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use proptest::prelude::*;
use shiplog_ids::EventId;
use shiplog_schema::event::*;
use shiplog_schema::workstream::Workstream;
proptest! {
#[test]
fn aliases_are_stable_for_same_key(kind in "repo|ws", value in ".*") {
let r = DeterministicRedactor::new(b"test-key");
let a1 = r.alias(&kind, &value);
let a2 = r.alias(&kind, &value);
prop_assert_eq!(a1, a2);
}
}
#[test]
fn public_profile_strips_titles_and_links() {
let r = DeterministicRedactor::new(b"k");
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "o/r".into(),
html_url: Some("https://github.com/o/r".into()),
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: "secret pr title".into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(1),
deletions: Some(1),
changed_files: Some(1),
touched_paths_hint: vec!["secret/path".into()],
window: None,
}),
tags: vec![],
links: vec![Link {
label: "pr".into(),
url: "https://github.com/o/r/pull/1".into(),
}],
source: SourceRef {
system: SourceSystem::Github,
url: Some("https://api.github.com/...".into()),
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "public").unwrap();
match &out[0].payload {
EventPayload::PullRequest(pr) => {
assert_eq!(pr.title, "[redacted]");
assert!(pr.touched_paths_hint.is_empty());
}
_ => panic!("expected pr"),
}
assert!(out[0].links.is_empty());
assert!(out[0].source.url.is_none());
assert_ne!(out[0].repo.full_name, "o/r");
}
#[test]
fn public_redaction_no_leak_pr_title() {
let r = DeterministicRedactor::new(b"test-key");
let sensitive_title = "Secret Feature: Internal Auth Bypass";
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "o/r".into(),
html_url: None,
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: sensitive_title.into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(10),
deletions: Some(5),
changed_files: Some(2),
touched_paths_hint: vec![],
window: None,
}),
tags: vec![],
links: vec![],
source: SourceRef {
system: SourceSystem::Github,
url: None,
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "public").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
!json.contains(sensitive_title),
"Sensitive PR title leaked in JSON output"
);
assert!(
!json.contains("Auth Bypass"),
"Partial sensitive content leaked"
);
}
#[test]
fn public_redaction_no_leak_repo_name() {
let r = DeterministicRedactor::new(b"test-key");
let sensitive_repo = "acme-corp/top-secret-project";
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: sensitive_repo.into(),
html_url: None,
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: "test".into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(1),
deletions: Some(1),
changed_files: Some(1),
touched_paths_hint: vec![],
window: None,
}),
tags: vec![],
links: vec![],
source: SourceRef {
system: SourceSystem::Github,
url: None,
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "public").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
!json.contains(sensitive_repo),
"Sensitive repo name leaked in JSON output"
);
assert!(
!json.contains("acme-corp"),
"Org name leaked in JSON output"
);
assert!(
!json.contains("top-secret"),
"Project name leaked in JSON output"
);
}
#[test]
fn public_redaction_no_leak_manual_content() {
use chrono::NaiveDate;
let r = DeterministicRedactor::new(b"test-key");
let sensitive_title = "Security Incident: Data Breach Response";
let sensitive_desc = "Customer PII was exposed in production logs";
let sensitive_impact = "Affected 10,000 user records";
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::Manual,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "o/r".into(),
html_url: None,
visibility: RepoVisibility::Private,
},
payload: EventPayload::Manual(ManualEvent {
event_type: ManualEventType::Incident,
title: sensitive_title.into(),
description: Some(sensitive_desc.into()),
impact: Some(sensitive_impact.into()),
started_at: Some(NaiveDate::from_ymd_opt(2024, 1, 15).unwrap()),
ended_at: Some(NaiveDate::from_ymd_opt(2024, 1, 15).unwrap()),
}),
tags: vec![],
links: vec![],
source: SourceRef {
system: SourceSystem::Manual,
url: None,
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "public").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
!json.contains(sensitive_title),
"Sensitive manual event title leaked"
);
assert!(
!json.contains(sensitive_desc),
"Sensitive manual event description leaked"
);
assert!(
!json.contains(sensitive_impact),
"Sensitive manual event impact leaked"
);
assert!(
!json.contains("Data Breach"),
"Partial sensitive content leaked"
);
assert!(!json.contains("PII"), "Sensitive abbreviation leaked");
}
#[test]
fn public_redaction_strips_all_urls() {
let r = DeterministicRedactor::new(b"test-key");
let urls = vec![
"https://github.com/acme-corp/secret/pull/42",
"https://api.github.com/repos/acme-corp/secret/issues/1",
"https://jira.internal.company.com/SECRET-123",
"https://docs.google.com/document/d/secret-doc-id",
];
for url in urls {
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "o/r".into(),
html_url: Some(url.into()),
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: "test".into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(1),
deletions: Some(1),
changed_files: Some(1),
touched_paths_hint: vec![],
window: None,
}),
tags: vec![],
links: vec![Link {
label: "link".into(),
url: url.into(),
}],
source: SourceRef {
system: SourceSystem::Github,
url: Some(url.into()),
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "public").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
!json.contains("github.com/acme-corp"),
"GitHub URL leaked: {}",
url
);
assert!(!json.contains("jira.internal"), "Jira URL leaked: {}", url);
assert!(
!json.contains("docs.google.com"),
"Google Docs URL leaked: {}",
url
);
assert!(!json.contains("http"), "HTTP prefix leaked in: {}", url);
}
}
#[test]
fn workstream_redaction_no_leak() {
use shiplog_ids::WorkstreamId;
use shiplog_schema::workstream::WorkstreamStats;
let r = DeterministicRedactor::new(b"test-key");
let ws = Workstream {
id: WorkstreamId::from_parts(["ws", "test"]),
title: "Secret Project: Quantum Encryption".into(),
summary: Some(
"Developing military-grade encryption for classified communications".into(),
),
tags: vec!["security".into(), "classified".into(), "repo".into()],
stats: WorkstreamStats::zero(),
events: vec![],
receipts: vec![],
};
let ws_file = WorkstreamsFile {
workstreams: vec![ws],
version: 1,
generated_at: Utc::now(),
};
let out = r.redact_workstreams(&ws_file, "public").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
!json.contains("Quantum Encryption"),
"Workstream title leaked"
);
assert!(
!json.contains("military-grade"),
"Workstream summary leaked"
);
assert!(
!json.contains("Developing"),
"Workstream description leaked"
);
let ws_out = &out.workstreams[0];
assert!(
!ws_out.tags.contains(&"repo".into()),
"repo tag should be filtered"
);
assert!(
ws_out.tags.contains(&"security".into()),
"security tag should remain"
);
assert!(
ws_out.tags.contains(&"classified".into()),
"classified tag should remain (only 'repo' is filtered)"
);
}
#[test]
fn internal_profile_preserves_all_data() {
let r = DeterministicRedactor::new(b"test-key");
let sensitive_title = "Secret Feature Title";
let sensitive_repo = "secret-org/secret-repo";
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: sensitive_repo.into(),
html_url: Some("https://github.com/secret".into()),
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: sensitive_title.into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(1),
deletions: Some(1),
changed_files: Some(1),
touched_paths_hint: vec!["secret/path".into()],
window: None,
}),
tags: vec![],
links: vec![Link {
label: "pr".into(),
url: "https://github.com/secret".into(),
}],
source: SourceRef {
system: SourceSystem::Github,
url: Some("https://api.github.com/secret".into()),
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "internal").unwrap();
let json = serde_json::to_string(&out).unwrap();
assert!(
json.contains(sensitive_title),
"Internal profile should preserve title"
);
assert!(
json.contains(sensitive_repo),
"Internal profile should preserve repo"
);
assert!(
json.contains("https://github.com/secret"),
"Internal profile should preserve URLs"
);
}
#[test]
fn manager_profile_keeps_context_but_strips_details() {
let r = DeterministicRedactor::new(b"test-key");
let pr_title = "Feature: Add user authentication".to_string();
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "myorg/auth-service".into(),
html_url: Some("https://github.com/myorg/auth-service".into()),
visibility: RepoVisibility::Private,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: 42,
title: pr_title.clone(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(100),
deletions: Some(50),
changed_files: Some(5),
touched_paths_hint: vec!["src/auth/internal.rs".into(), "src/secrets.rs".into()],
window: None,
}),
tags: vec![],
links: vec![Link {
label: "pr".into(),
url: "https://github.com/myorg/auth-service/pull/42".into(),
}],
source: SourceRef {
system: SourceSystem::Github,
url: Some("https://api.github.com/...".into()),
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "manager").unwrap();
match &out[0].payload {
EventPayload::PullRequest(pr) => {
assert_eq!(pr.title, pr_title);
assert!(
pr.touched_paths_hint.is_empty(),
"touched_paths_hint should be cleared in manager view"
);
}
_ => panic!("expected pr"),
}
assert!(
out[0].links.is_empty(),
"links should be stripped in manager view"
);
assert_eq!(out[0].repo.full_name, "myorg/auth-service");
assert!(out[0].source.url.is_some());
}
#[test]
fn manager_profile_handles_manual_events() {
use chrono::NaiveDate;
let r = DeterministicRedactor::new(b"test-key");
let ev = EventEnvelope {
id: EventId::from_parts(["x", "1"]),
kind: EventKind::Manual,
occurred_at: Utc::now(),
actor: Actor {
login: "a".into(),
id: None,
},
repo: RepoRef {
full_name: "o/r".into(),
html_url: None,
visibility: RepoVisibility::Private,
},
payload: EventPayload::Manual(ManualEvent {
event_type: ManualEventType::Incident,
title: "Database outage resolution".into(),
description: Some("Detailed technical description of the fix".into()),
impact: Some("Affected 1000 users for 5 minutes".into()),
started_at: Some(NaiveDate::from_ymd_opt(2024, 1, 15).unwrap()),
ended_at: Some(NaiveDate::from_ymd_opt(2024, 1, 15).unwrap()),
}),
tags: vec![],
links: vec![Link {
label: "runbook".into(),
url: "https://wiki.internal/runbook".into(),
}],
source: SourceRef {
system: SourceSystem::Manual,
url: None,
opaque_id: None,
},
};
let out = r.redact_events(&[ev], "manager").unwrap();
match &out[0].payload {
EventPayload::Manual(m) => {
assert_eq!(m.title, "Database outage resolution");
assert!(
m.description.is_none(),
"description should be removed in manager view"
);
assert!(
m.impact.is_none(),
"impact should be removed in manager view"
);
}
_ => panic!("expected manual event"),
}
assert!(out[0].links.is_empty());
}
#[test]
fn manager_profile_handles_workstreams() {
use shiplog_ids::WorkstreamId;
use shiplog_schema::workstream::WorkstreamStats;
let r = DeterministicRedactor::new(b"test-key");
let ws = Workstream {
id: WorkstreamId::from_parts(["ws", "test"]),
title: "Authentication Service Improvements".into(),
summary: Some("Internal details about security architecture".into()),
tags: vec!["security".into(), "backend".into(), "repo".into()],
stats: WorkstreamStats::zero(),
events: vec![],
receipts: vec![],
};
let ws_file = WorkstreamsFile {
workstreams: vec![ws],
version: 1,
generated_at: Utc::now(),
};
let out = r.redact_workstreams(&ws_file, "manager").unwrap();
let ws_out = &out.workstreams[0];
assert_eq!(ws_out.title, "Authentication Service Improvements");
assert!(
ws_out.summary.is_none(),
"summary should be removed in manager view"
);
assert!(ws_out.tags.contains(&"security".into()));
assert!(ws_out.tags.contains(&"backend".into()));
assert!(ws_out.tags.contains(&"repo".into()));
}
#[test]
fn cache_round_trip() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("redaction.aliases.json");
let r1 = DeterministicRedactor::new(b"key-a");
let a1 = r1.alias("repo", "acme/foo");
let a2 = r1.alias("ws", "my-workstream");
r1.save_cache(&cache_path).unwrap();
let r2 = DeterministicRedactor::new(b"key-a");
r2.load_cache(&cache_path).unwrap();
assert_eq!(r2.alias("repo", "acme/foo"), a1);
assert_eq!(r2.alias("ws", "my-workstream"), a2);
}
#[test]
fn missing_file_is_noop() {
let r = DeterministicRedactor::new(b"key");
let result = r.load_cache(std::path::Path::new("/nonexistent/path/cache.json"));
assert!(result.is_ok());
}
#[test]
fn corrupt_file_errors() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("redaction.aliases.json");
std::fs::write(&cache_path, "this is not json!!!").unwrap();
let r = DeterministicRedactor::new(b"key");
let result = r.load_cache(&cache_path);
assert!(result.is_err());
}
#[test]
fn version_mismatch_errors() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("redaction.aliases.json");
let bad = serde_json::json!({ "version": 99, "entries": {} });
std::fs::write(&cache_path, serde_json::to_string(&bad).unwrap()).unwrap();
let r = DeterministicRedactor::new(b"key");
let result = r.load_cache(&cache_path);
assert!(result.is_err());
let msg = format!("{}", result.unwrap_err());
assert!(msg.contains("unsupported alias cache version"));
}
#[test]
fn redaction_profile_as_str_returns_expected_values() {
assert_eq!(RedactionProfile::Internal.as_str(), "internal");
assert_eq!(RedactionProfile::Manager.as_str(), "manager");
assert_eq!(RedactionProfile::Public.as_str(), "public");
}
#[test]
fn cache_path_joins_out_dir_with_filename() {
let path = DeterministicRedactor::cache_path(Path::new("/some/out"));
assert!(
path.ends_with("redaction.aliases.json"),
"expected path to end with cache filename, got: {path:?}"
);
}
#[test]
fn internal_profile_preserves_workstreams() {
use shiplog_ids::WorkstreamId;
use shiplog_schema::workstream::WorkstreamStats;
let r = DeterministicRedactor::new(b"test-key");
let ws = Workstream {
id: WorkstreamId::from_parts(["ws", "keep"]),
title: "My Workstream Title".into(),
summary: Some("Detailed summary here".into()),
tags: vec!["tag-a".into(), "repo".into()],
stats: WorkstreamStats::zero(),
events: vec![],
receipts: vec![],
};
let ws_file = WorkstreamsFile {
workstreams: vec![ws],
version: 1,
generated_at: Utc::now(),
};
let out = r.redact_workstreams(&ws_file, "internal").unwrap();
let ws_out = &out.workstreams[0];
assert_eq!(ws_out.title, "My Workstream Title");
assert_eq!(ws_out.summary.as_deref(), Some("Detailed summary here"));
assert_eq!(ws_out.tags, vec!["tag-a".to_string(), "repo".to_string()]);
}
#[test]
fn cache_preserves_across_key_change() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("redaction.aliases.json");
let r1 = DeterministicRedactor::new(b"key-A");
let alias_a = r1.alias("repo", "acme/foo");
r1.save_cache(&cache_path).unwrap();
let r2 = DeterministicRedactor::new(b"key-B");
r2.load_cache(&cache_path).unwrap();
let alias_b = r2.alias("repo", "acme/foo");
assert_eq!(
alias_a, alias_b,
"cached alias should be preserved, not regenerated with new key"
);
let fresh_b = r2.alias("repo", "acme/bar");
let r3 = DeterministicRedactor::new(b"key-A");
let fresh_a = r3.alias("repo", "acme/bar");
assert_ne!(
fresh_b, fresh_a,
"uncached alias should use current key, not old key"
);
}
proptest! {
#[test]
fn prop_sensitive_strings_redacted(
title in "[a-zA-Z0-9_-]{10,50}",
repo in r"[a-z0-9_-]+/[a-z0-9_-]+"
) {
let r = DeterministicRedactor::new(b"test-key");
let ev = EventEnvelope {
id: EventId::from_parts(["x","1"]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor { login: "a".into(), id: None },
repo: RepoRef { full_name: repo.clone(), html_url: None, visibility: RepoVisibility::Private },
payload: EventPayload::PullRequest(PullRequestEvent {
number: 1,
title: title.clone(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(1),
deletions: Some(1),
changed_files: Some(1),
touched_paths_hint: vec![],
window: None,
}),
tags: vec![],
links: vec![],
source: SourceRef { system: SourceSystem::Github, url: None, opaque_id: None },
};
let out = r.redact_events(&[ev], "public").unwrap();
let json = serde_json::to_string(&out)?;
prop_assert!(!json.contains(&title), "Title '{}' leaked in output", title);
if !repo.is_empty() {
prop_assert!(!json.contains(&repo), "Repo '{}' leaked in output", repo);
}
match &out[0].payload {
EventPayload::PullRequest(pr) => {
prop_assert_eq!(&pr.title, "[redacted]");
}
_ => prop_assert!(false, "Expected PR payload"),
}
}
}
}