use std::borrow::Cow;
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use super::{RedactionPolicy, REDACTED_PLACEHOLDER};
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct RedactionEntry {
pub path: String,
pub class: String,
pub action: String,
pub replacement: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UnredactedSecret {
pub path: String,
pub excerpt: String,
}
impl RedactionPolicy {
pub fn redact_json_manifest(&self, value: &mut JsonValue) -> Vec<RedactionEntry> {
let mut entries = Vec::new();
self.redact_json_manifest_at(value, "$", &mut entries);
entries
}
fn redact_json_manifest_at(
&self,
value: &mut JsonValue,
path: &str,
entries: &mut Vec<RedactionEntry>,
) {
match value {
JsonValue::Object(map) => {
let keys = map.keys().cloned().collect::<Vec<_>>();
for key in keys {
let child_path = json_path_child(path, &key);
if self.field_is_sensitive(&key) {
map.insert(key, JsonValue::String(REDACTED_PLACEHOLDER.to_string()));
entries.push(RedactionEntry {
path: child_path,
class: "sensitive_field".to_string(),
action: "replaced".to_string(),
replacement: Some(REDACTED_PLACEHOLDER.to_string()),
});
} else if let Some(child) = map.get_mut(&key) {
self.redact_json_manifest_at(child, &child_path, entries);
}
}
}
JsonValue::Array(items) => {
for (index, item) in items.iter_mut().enumerate() {
self.redact_json_manifest_at(item, &format!("{path}[{index}]"), entries);
}
}
JsonValue::String(text) => {
let redacted = self.redact_string(text);
if let Cow::Owned(replacement) = redacted {
let manifest_replacement = replacement.clone();
*text = replacement;
entries.push(RedactionEntry {
path: path.to_string(),
class: "secret_pattern_or_url".to_string(),
action: "replaced".to_string(),
replacement: Some(manifest_replacement),
});
}
}
_ => {}
}
}
pub fn find_unredacted_secret(&self, value: &JsonValue) -> Option<UnredactedSecret> {
self.find_unredacted_secret_at(value, "$")
}
fn find_unredacted_secret_at(&self, value: &JsonValue, path: &str) -> Option<UnredactedSecret> {
match value {
JsonValue::Object(map) => {
for (key, child) in map {
if let Some(found) =
self.find_unredacted_secret_at(child, &json_path_child(path, key))
{
return Some(found);
}
}
None
}
JsonValue::Array(items) => {
for (index, item) in items.iter().enumerate() {
if let Some(found) =
self.find_unredacted_secret_at(item, &format!("{path}[{index}]"))
{
return Some(found);
}
}
None
}
JsonValue::String(text) => {
if matches!(self.redact_string(text), Cow::Owned(_)) {
Some(UnredactedSecret {
path: path.to_string(),
excerpt: secret_excerpt(text),
})
} else {
None
}
}
_ => None,
}
}
}
fn secret_excerpt(text: &str) -> String {
let excerpt = text.chars().take(80).collect::<String>();
if text.chars().count() > 80 {
format!("{excerpt}...")
} else {
excerpt
}
}
pub(crate) fn json_path_child(parent: &str, key: &str) -> String {
if key
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
{
format!("{parent}.{key}")
} else {
format!(
"{parent}[{}]",
serde_json::to_string(key).unwrap_or_default()
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn aws_key() -> String {
format!("AKIA{}", "ABCDEFGHIJKLMNOP")
}
fn github_pat() -> String {
format!("ghp_{}", "a".repeat(36))
}
fn stripe_key() -> String {
let head = ["sk", "live"].join("_");
format!("{head}_{}", "abcdefghijklmnopqrstuvwxyz")
}
fn private_key_block() -> String {
"-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAA\n-----END OPENSSH PRIVATE KEY-----".to_string()
}
fn dirty_transcript() -> JsonValue {
json!({
"_type": "transcript",
"messages": [
{ "role": "system", "content": "You are a coding agent. Commit is 903e58f1b0a4c2d3e4f5061728394a5b6c7d8e9f." },
{ "role": "user", "content": "deploy with AWS creds" },
{
"role": "assistant",
"content": [
{ "type": "text", "text": "Running the deploy." },
{
"type": "tool_use",
"id": "toolu_01",
"name": "run_command",
"input": {
"command": "aws deploy",
"api_key": aws_key(),
"env": { "AWS_ACCESS_KEY_ID": aws_key() }
}
}
]
},
{
"role": "tool",
"content": [
{
"type": "tool_result",
"tool_use_id": "toolu_01",
"content": format!(
"auth: Bearer abcDEF123_-longenoughtoken\ngithub token {}\nstripe {}\n{}\nrequest_id 550e8400-e29b-41d4-a716-446655440000",
github_pat(), stripe_key(), private_key_block()
)
}
]
}
],
"summary": "deployed ok"
})
}
fn secrets() -> Vec<String> {
vec![
aws_key(),
github_pat(),
stripe_key(),
"b3BlbnNzaC1rZXktdjEAAAAA".to_string(),
]
}
#[test]
fn redact_json_manifest_scrubs_every_secret_and_records_paths() {
crate::reset_thread_local_state();
let policy = RedactionPolicy::default();
let mut transcript = dirty_transcript();
let manifest = policy.redact_json_manifest(&mut transcript);
let rendered = serde_json::to_string(&transcript).unwrap();
for secret in secrets() {
assert!(
!rendered.contains(&secret),
"secret leaked into redacted transcript: {secret}\n{rendered}"
);
}
assert!(!manifest.is_empty(), "expected a non-empty manifest");
assert!(manifest
.iter()
.any(|entry| entry.path.ends_with(".api_key") && entry.class == "sensitive_field"));
assert!(manifest.iter().any(|entry| {
entry.class == "secret_pattern_or_url"
&& entry
.replacement
.as_deref()
.is_some_and(|value| value.contains("<redacted:"))
}));
}
#[test]
fn redact_json_manifest_preserves_non_secret_content() {
crate::reset_thread_local_state();
let policy = RedactionPolicy::default();
let mut transcript = dirty_transcript();
policy.redact_json_manifest(&mut transcript);
let rendered = serde_json::to_string(&transcript).unwrap();
assert!(rendered.contains("You are a coding agent"));
assert!(rendered.contains("deployed ok"));
assert!(rendered.contains("903e58f1b0a4c2d3e4f5061728394a5b6c7d8e9f"));
assert!(rendered.contains("550e8400-e29b-41d4-a716-446655440000"));
assert!(rendered.contains("Running the deploy."));
}
#[test]
fn redact_json_manifest_is_idempotent_on_output() {
crate::reset_thread_local_state();
let policy = RedactionPolicy::default();
let mut once = dirty_transcript();
policy.redact_json_manifest(&mut once);
let after_first = serde_json::to_string(&once).unwrap();
let mut twice = once.clone();
policy.redact_json_manifest(&mut twice);
let after_second = serde_json::to_string(&twice).unwrap();
assert_eq!(
after_first, after_second,
"second redaction pass must not further mangle already-redacted output"
);
}
#[test]
fn find_unredacted_secret_flags_raw_then_clears_after_redaction() {
crate::reset_thread_local_state();
let policy = RedactionPolicy::default();
let mut transcript = dirty_transcript();
let found = policy
.find_unredacted_secret(&transcript)
.expect("raw transcript still carries a secret");
assert!(found.path.starts_with("$."));
assert!(!found.excerpt.is_empty());
policy.redact_json_manifest(&mut transcript);
assert!(
policy.find_unredacted_secret(&transcript).is_none(),
"no secret should remain after redaction"
);
}
#[test]
fn find_unredacted_secret_ignores_benign_ids() {
crate::reset_thread_local_state();
let policy = RedactionPolicy::default();
let benign = json!({
"git_sha": "903e58f1b0a4c2d3e4f5061728394a5b6c7d8e9f",
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"note": "kept 12 messages, added 3, then replied in text",
"max_tokens": "max_tokens=200",
});
assert!(policy.find_unredacted_secret(&benign).is_none());
}
}