#![allow(clippy::expect_used, clippy::unwrap_used)]
use chio_guards::{
sanitize_json, AllowlistConfig, DenylistConfig, EntropyConfig, OutputSanitizer,
OutputSanitizerConfig, OutputSanitizerConfigError, PostInvocationPipeline,
PostInvocationVerdict, RedactionStrategy, SanitizerHook, SensitiveCategory,
};
use std::collections::HashMap;
fn sanitizer_with_config(cfg: OutputSanitizerConfig) -> OutputSanitizer {
match OutputSanitizer::with_config(cfg) {
Ok(sanitizer) => sanitizer,
Err(error) => panic!("expected sanitizer config to compile: {error}"),
}
}
#[test]
fn detects_aws_access_key() {
let s = OutputSanitizer::new();
let input = "export AWS_KEY=AKIAIOSFODNN7EXAMPLE";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains("AKIAIOSFODNN7EXAMPLE"));
assert!(r
.findings
.iter()
.any(|f| f.id == "secret_aws_access_key_id"));
}
#[test]
fn detects_aws_secret_access_key() {
let s = OutputSanitizer::new();
let input = "aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r
.sanitized
.contains("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"));
}
#[test]
fn detects_github_token() {
let s = OutputSanitizer::new();
let key = format!("ghp_{}", "a".repeat(36));
let input = format!("GITHUB_TOKEN={key}");
let r = s.sanitize_text(&input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains(&key));
assert!(r.findings.iter().any(|f| f.id == "secret_github_token"));
}
#[test]
fn detects_slack_bot_token() {
let s = OutputSanitizer::new();
let tok = format!("xoxb-{}", "A".repeat(48));
let input = format!("slack_token={tok}");
let r = s.sanitize_text(&input);
assert!(r.was_redacted, "expected redaction, got {:?}", r.sanitized);
assert!(!r.sanitized.contains(&tok));
}
#[test]
fn detects_slack_webhook() {
let s = OutputSanitizer::new();
let url = "https://hooks.slack.com/services/T00000000/B11111111/XXXXXXXXXXXXXXXXXXXXXXXX";
let input = format!("webhook: {url}");
let r = s.sanitize_text(&input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains(url));
}
#[test]
fn detects_gcp_service_account_json() {
let s = OutputSanitizer::new();
let input = r#"{"type": "service_account", "project_id": "my-project"}"#;
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains(r#""type": "service_account""#));
}
#[test]
fn detects_gcp_service_account_object_through_pipeline() {
let s = OutputSanitizer::new();
let mut pipeline = PostInvocationPipeline::new();
pipeline.add(Box::new(SanitizerHook::from_sanitizer(s)));
let response = serde_json::json!({
"type": "service_account",
"project_id": "my-project",
"client_email": "svc@example.com"
});
let outcome = pipeline.evaluate_with_evidence("tool", &response);
match outcome.verdict {
PostInvocationVerdict::Redact(v) => {
assert!(v.is_null());
}
other => panic!("expected Redact, got {other:?}"),
}
assert_eq!(outcome.evidence.len(), 1);
assert_eq!(outcome.evidence[0].guard_name, "output-sanitizer");
assert!(outcome.evidence[0]
.details
.as_deref()
.unwrap_or("")
.contains("secret_gcp_service_account"));
}
#[test]
fn detects_password_assignment() {
let s = OutputSanitizer::new();
let input = "config: password=hunter2hunter2";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains("hunter2hunter2"));
}
#[test]
fn detects_pem_private_key() {
let s = OutputSanitizer::new();
let input = "\
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEA...redacted...
-----END RSA PRIVATE KEY-----";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains("MIIEowIBAAKCAQEA"));
assert!(r.findings.iter().any(|f| f.id == "secret_pem_private_key"));
}
#[test]
fn detects_jwt() {
let s = OutputSanitizer::new();
let jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
let input = format!("bearer token: {jwt}");
let r = s.sanitize_text(&input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains(jwt));
assert!(r.findings.iter().any(|f| f.id == "secret_jwt"));
}
#[test]
fn detects_oauth_bearer_header() {
let s = OutputSanitizer::new();
let input = "Authorization: Bearer sk_live_abcdefghijklmnopqrstuvwxyz";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains("sk_live_abcdefghijklmnopqrstuvwxyz"));
}
#[test]
fn detects_dashed_ssn() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("SSN 123-45-6789");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("123-45-6789"));
assert!(r.findings.iter().any(|f| f.id == "pii_ssn"));
}
#[test]
fn rejects_invalid_ssn_area() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("SSN 000-12-3456");
assert!(!r.findings.iter().any(|f| f.id == "pii_ssn"));
}
#[test]
fn detects_credit_card_with_luhn() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("card: 4111 1111 1111 1111");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("4111 1111 1111 1111"));
assert!(r.findings.iter().any(|f| f.id == "pii_credit_card"));
}
#[test]
fn rejects_random_16_digit_number() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("order #1234567890123456");
assert!(!r.findings.iter().any(|f| f.id == "pii_credit_card"));
}
#[test]
fn rejects_repeated_digit_16() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("card: 1111 1111 1111 1111");
assert!(!r.findings.iter().any(|f| f.id == "pii_credit_card"));
}
#[test]
fn detects_email() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("Contact alice@example.com please");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("alice@example.com"));
assert!(r.findings.iter().any(|f| f.id == "pii_email"));
}
#[test]
fn detects_internal_private_ip() {
let s = OutputSanitizer::new();
let r = s.sanitize_text("upstream: 10.0.1.42");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("10.0.1.42"));
}
#[test]
fn detects_high_entropy_token() {
let s = OutputSanitizer::new();
let tok = "aB3dEfGh7JklmNopQrStUvWxYz012345abcdEfGhijKl";
let input = format!("opaque: {tok}");
let r = s.sanitize_text(&input);
assert!(r.was_redacted);
assert!(!r.sanitized.contains(tok));
assert!(r
.findings
.iter()
.any(|f| f.id == "secret_high_entropy_token"));
}
#[test]
fn low_entropy_token_not_flagged() {
let s = OutputSanitizer::new();
let input = "token: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let r = s.sanitize_text(input);
assert!(!r
.findings
.iter()
.any(|f| f.id == "secret_high_entropy_token"));
}
#[test]
fn entropy_threshold_configurable() {
let cfg = OutputSanitizerConfig {
entropy: EntropyConfig {
enabled: true,
threshold: 1.0,
min_token_len: 16,
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let r = s.sanitize_text("aaaabbbbccccddddeeeeffff");
assert!(r.was_redacted);
}
#[test]
fn allowlist_exact_suppresses_redaction() {
let cfg = OutputSanitizerConfig {
allowlist: AllowlistConfig {
exact: vec!["alice@example.com".to_string()],
patterns: vec![],
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let r = s.sanitize_text("alice@example.com");
assert!(!r.was_redacted);
assert_eq!(r.sanitized, "alice@example.com");
}
#[test]
fn allowlist_pattern_suppresses_redaction() {
let cfg = OutputSanitizerConfig {
allowlist: AllowlistConfig {
exact: vec![],
patterns: vec![r"@example\.com$".to_string()],
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let r = s.sanitize_text("bob@example.com");
assert!(!r.was_redacted);
assert_eq!(r.sanitized, "bob@example.com");
}
#[test]
fn denylist_exact_forces_redaction() {
let cfg = OutputSanitizerConfig {
denylist: DenylistConfig {
exact: vec!["CONFIDENTIAL_TAG".to_string()],
patterns: vec![],
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let r = s.sanitize_text("this message is CONFIDENTIAL_TAG bye");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("CONFIDENTIAL_TAG"));
}
#[test]
fn denylist_pattern_forces_redaction() {
let cfg = OutputSanitizerConfig {
denylist: DenylistConfig {
exact: vec![],
patterns: vec![r"internal-[a-z]{6}".to_string()],
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let r = s.sanitize_text("code: internal-abcdef");
assert!(r.was_redacted);
assert!(!r.sanitized.contains("internal-abcdef"));
}
#[test]
fn overlap_only_one_redaction_per_region() {
let s = OutputSanitizer::new();
let input = "AKIAIOSFODNN7EXAMPLE";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert_eq!(r.redactions.len(), 1);
assert_eq!(r.sanitized, "****");
}
#[test]
fn overlap_prefers_stronger_strategy() {
let cfg = OutputSanitizerConfig {
denylist: DenylistConfig {
exact: vec![],
patterns: vec![r"AKIA[0-9A-Z]{10,}".to_string()],
},
..Default::default()
};
let s = sanitizer_with_config(cfg);
let input = "AKIAIOSFODNN7EXAMPLE";
let r = s.sanitize_text(input);
assert!(r.was_redacted);
assert_eq!(r.redactions.len(), 1);
assert!(!r.sanitized.contains("AKIA"));
}
fn sanitizer_with_secret_strategy(strategy: RedactionStrategy) -> OutputSanitizer {
let mut strategies: HashMap<SensitiveCategory, RedactionStrategy> = HashMap::new();
strategies.insert(SensitiveCategory::Secret, strategy);
strategies.insert(SensitiveCategory::Pii, RedactionStrategy::Mask);
strategies.insert(SensitiveCategory::Internal, RedactionStrategy::TypeLabel);
let cfg = OutputSanitizerConfig {
redaction_strategies: strategies,
..Default::default()
};
sanitizer_with_config(cfg)
}
#[test]
fn invalid_denylist_regex_is_rejected() {
let cfg = OutputSanitizerConfig {
denylist: DenylistConfig {
exact: vec![],
patterns: vec!["(".to_string()],
},
..Default::default()
};
match OutputSanitizer::with_config(cfg) {
Ok(_) => panic!("expected invalid denylist regex to be rejected"),
Err(OutputSanitizerConfigError::InvalidPattern {
list_name, pattern, ..
}) => {
assert_eq!(list_name, "denylist");
assert_eq!(pattern, "(");
}
}
}
#[test]
fn strategy_mask_replaces_with_stars() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Mask);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert_eq!(r.sanitized, "****");
}
#[test]
fn strategy_fingerprint_replaces_with_hash() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Fingerprint);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert!(r.sanitized.starts_with("[FP:"));
assert!(r.sanitized.ends_with(']'));
let r2 = s.sanitize_text(&key);
assert_eq!(r.sanitized, r2.sanitized);
}
#[test]
fn strategy_drop_empties_string() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Drop);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert_eq!(r.sanitized, "");
}
#[test]
fn strategy_drop_collapses_field_to_null_in_json() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Drop);
let key = format!("ghp_{}", "a".repeat(36));
let response = serde_json::json!({"token": key, "other": "clean"});
let sv = s.sanitize_value(&response);
assert!(sv.was_redacted);
assert_eq!(sv.value["token"], serde_json::Value::Null);
assert_eq!(
sv.value["other"],
serde_json::Value::String("clean".to_string())
);
}
#[test]
fn strategy_tokenize_records_mapping() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Tokenize);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert!(r.sanitized.starts_with("[TOKEN:tok_"));
assert!(r.sanitized.ends_with(']'));
let vault = s.token_vault();
let id = r
.sanitized
.trim_start_matches("[TOKEN:")
.trim_end_matches(']');
let retrieved = vault.get(id).expect("token vault contains mapping");
assert_eq!(retrieved, key);
}
#[test]
fn strategy_type_label_uses_data_type() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::TypeLabel);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert!(
r.sanitized.contains("[REDACTED:github_token]"),
"got {}",
r.sanitized
);
}
#[test]
fn strategy_partial_keeps_prefix_suffix() {
let s = sanitizer_with_secret_strategy(RedactionStrategy::Partial);
let key = format!("ghp_{}", "a".repeat(36));
let r = s.sanitize_text(&key);
assert!(r.sanitized.starts_with("gh"));
assert!(r.sanitized.ends_with("aa"));
assert!(r.sanitized.contains("***"));
}
#[test]
fn sanitize_value_preserves_json_structure() {
let s = OutputSanitizer::new();
let input = serde_json::json!({
"ok": true,
"nested": {
"email": "alice@example.com",
"items": [
{"token": format!("ghp_{}", "a".repeat(36))},
{"clean": "nothing"},
],
},
"numeric": 42,
});
let sv = s.sanitize_value(&input);
assert!(sv.was_redacted);
assert!(sv.value.as_object().unwrap().contains_key("ok"));
assert!(sv.value.as_object().unwrap().contains_key("nested"));
assert!(sv.value.as_object().unwrap().contains_key("numeric"));
assert_eq!(sv.value["numeric"], serde_json::json!(42));
assert_eq!(sv.value["ok"], serde_json::json!(true));
let items = sv.value["nested"]["items"].as_array().unwrap();
assert_eq!(items.len(), 2);
assert_eq!(items[1], serde_json::json!({"clean": "nothing"}));
let rendered = sv.value.to_string();
assert!(!rendered.contains("alice@example.com"));
assert!(!rendered.contains("ghp_aaaaaaaa"));
let serialized = serde_json::to_string(&sv.value).unwrap();
let reparsed: serde_json::Value = serde_json::from_str(&serialized).unwrap();
assert_eq!(reparsed, sv.value);
}
#[test]
fn sanitizer_hook_clean_response_no_evidence() {
let mut pipeline = PostInvocationPipeline::new();
pipeline.add(Box::new(SanitizerHook::new()));
let response = serde_json::json!({"status": "ok"});
let outcome = pipeline.evaluate_with_evidence("tool", &response);
assert!(matches!(outcome.verdict, PostInvocationVerdict::Allow));
assert!(outcome.evidence.is_empty());
}
#[test]
fn sanitizer_hook_dirty_response_emits_evidence() {
let mut pipeline = PostInvocationPipeline::new();
pipeline.add(Box::new(SanitizerHook::new()));
let response = serde_json::json!({
"summary": "Hello alice@example.com",
"aws_key": "AKIAIOSFODNN7EXAMPLE",
});
let outcome = pipeline.evaluate_with_evidence("tool", &response);
match &outcome.verdict {
PostInvocationVerdict::Redact(v) => {
let rendered = v.to_string();
assert!(!rendered.contains("alice@example.com"));
assert!(!rendered.contains("AKIAIOSFODNN7EXAMPLE"));
}
other => panic!("expected Redact, got {other:?}"),
}
assert_eq!(outcome.evidence.len(), 1);
let ev = &outcome.evidence[0];
assert_eq!(ev.guard_name, "output-sanitizer");
assert!(ev.verdict);
let details = ev.details.as_deref().unwrap_or("");
assert!(details.contains("pii_email"), "got: {details}");
assert!(
details.contains("secret_aws_access_key_id"),
"got: {details}"
);
assert!(!details.contains("alice@example.com"));
assert!(!details.contains("AKIAIOSFODNN7EXAMPLE"));
}
#[test]
fn sanitize_json_helper_aggregates_findings() {
let s = OutputSanitizer::new();
let input = serde_json::json!({
"email": "alice@example.com",
"nested": {"key": format!("ghp_{}", "a".repeat(36))},
});
let (value, result) = sanitize_json(&s, &input);
assert!(result.was_redacted);
assert!(result.findings.len() >= 2);
let rendered = value.to_string();
assert!(!rendered.contains("alice@example.com"));
assert!(!rendered.contains("ghp_aaaaaa"));
}
#[test]
fn acceptance_post_invocation_guard_redacts_everything() {
let mut pipeline = PostInvocationPipeline::new();
pipeline.add(Box::new(SanitizerHook::new()));
let response = serde_json::json!({
"ssn": "123-45-6789",
"card": "4111 1111 1111 1111",
"api_key": "AKIAIOSFODNN7EXAMPLE",
"opaque": "aB3dEfGh7JklmNopQrStUvWxYz012345abcdEfGh",
});
let outcome = pipeline.evaluate_with_evidence("tool", &response);
match &outcome.verdict {
PostInvocationVerdict::Redact(v) => {
let rendered = v.to_string();
assert!(!rendered.contains("123-45-6789"));
assert!(!rendered.contains("4111 1111 1111 1111"));
assert!(!rendered.contains("AKIAIOSFODNN7EXAMPLE"));
assert!(!rendered.contains("aB3dEfGh7JklmNopQrStUvWxYz012345abcdEfGh"));
}
other => panic!("expected Redact, got {other:?}"),
}
}