use std::sync::OnceLock;
use regex::Regex;
use tracing::warn;
const MAX_STRING_BYTES: usize = 4096;
struct MaskPattern {
regex: Regex,
label: &'static str,
prefix_len: usize,
}
fn builtin_patterns() -> Vec<MaskPattern> {
vec![
MaskPattern {
regex: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
label: "AWS_KEY",
prefix_len: 4, },
MaskPattern {
regex: Regex::new(r"github_pat_[a-zA-Z0-9_]{82}").unwrap(),
label: "GITHUB_TOKEN",
prefix_len: 11, },
MaskPattern {
regex: Regex::new(r"ghp_[a-zA-Z0-9]{36}").unwrap(),
label: "GITHUB_TOKEN",
prefix_len: 4, },
MaskPattern {
regex: Regex::new(r"sk_live_[0-9a-zA-Z]{24}").unwrap(),
label: "STRIPE_KEY",
prefix_len: 8, },
MaskPattern {
regex: Regex::new(r"sk_test_[0-9a-zA-Z]{24}").unwrap(),
label: "STRIPE_KEY",
prefix_len: 8, },
MaskPattern {
regex: Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap(),
label: "OPENAI_KEY",
prefix_len: 3, },
MaskPattern {
regex: Regex::new(r"xox[baprs]-[0-9a-zA-Z\-]{10,48}").unwrap(),
label: "SLACK_TOKEN",
prefix_len: 5, },
MaskPattern {
regex: Regex::new(r"Bearer\s+[a-zA-Z0-9\-._~+/]+=*").unwrap(),
label: "BEARER",
prefix_len: 0,
},
MaskPattern {
regex: Regex::new(
r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |OPENSSH )?PRIVATE KEY-----",
)
.unwrap(),
label: "PRIVATE_KEY",
prefix_len: 0,
},
]
}
pub struct PrivacyFilter {
patterns: Vec<MaskPattern>,
}
impl PrivacyFilter {
pub fn new(extra_patterns: &[String]) -> Self {
let mut patterns = builtin_patterns();
for pat_str in extra_patterns {
match Regex::new(pat_str) {
Ok(regex) => {
patterns.push(MaskPattern {
regex,
label: "CUSTOM",
prefix_len: 0,
});
}
Err(e) => {
warn!(pattern = %pat_str, error = %e, "Skipping invalid privacy regex pattern");
}
}
}
Self { patterns }
}
}
static FILTER: OnceLock<PrivacyFilter> = OnceLock::new();
pub fn init_filter(extra_patterns: &[String]) {
FILTER.get_or_init(|| PrivacyFilter::new(extra_patterns));
}
pub fn get_filter() -> &'static PrivacyFilter {
FILTER
.get()
.expect("privacy filter not initialized — call init_filter() at startup")
}
fn mask_string(s: &str, patterns: &[MaskPattern]) -> String {
let mut result = s.to_string();
for pat in patterns {
result = pat
.regex
.replace_all(&result, |caps: ®ex::Captures| {
let matched = &caps[0];
if pat.prefix_len > 0 && matched.len() >= pat.prefix_len {
let prefix = &matched[..pat.prefix_len];
format!("[{}:{}***]", pat.label, prefix)
} else {
format!("[{}:***]", pat.label)
}
})
.into_owned();
}
result
}
fn truncate_string(s: String) -> String {
if s.len() > MAX_STRING_BYTES {
let original_len = s.len();
let mut truncated = s;
truncated.truncate(MAX_STRING_BYTES);
while !truncated.is_char_boundary(truncated.len()) {
truncated.pop();
}
format!("{}[TRUNCATED:{}B]", truncated, original_len)
} else {
s
}
}
pub fn filter_value(value: &mut serde_json::Value, filter: &PrivacyFilter) {
match value {
serde_json::Value::String(s) => {
let masked = mask_string(s, &filter.patterns);
let truncated = truncate_string(masked);
*s = truncated;
}
serde_json::Value::Object(map) => {
for v in map.values_mut() {
filter_value(v, filter);
}
}
serde_json::Value::Array(arr) => {
for v in arr.iter_mut() {
filter_value(v, filter);
}
}
_ => {}
}
}
pub fn filter_event(value: &mut serde_json::Value) {
let filter = get_filter();
filter_value(value, filter);
}
pub fn filter_event_with(value: &mut serde_json::Value, filter: &PrivacyFilter) {
filter_value(value, filter);
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
fn default_filter() -> PrivacyFilter {
PrivacyFilter::new(&[])
}
#[test]
fn test_mask_string_aws_key_masked_with_prefix() {
let filter = default_filter();
let mut val = json!("AKIA1234567890ABCDEF");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[AWS_KEY:AKIA***]"));
}
#[test]
fn test_mask_string_github_token_ghp_masked_with_prefix() {
let filter = default_filter();
let token = format!("ghp_{}", "a".repeat(36));
let mut val = json!(token);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[GITHUB_TOKEN:ghp_***]"));
}
#[test]
fn test_mask_string_github_pat_masked_with_prefix() {
let filter = default_filter();
let token = format!("github_pat_{}", "a".repeat(82));
let mut val = json!(token);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[GITHUB_TOKEN:github_pat_***]"));
}
#[test]
fn test_mask_string_openai_key_masked_with_prefix() {
let filter = default_filter();
let key = format!("sk-{}", "a".repeat(48));
let mut val = json!(key);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[OPENAI_KEY:sk-***]"));
}
#[test]
fn test_mask_string_slack_token_masked_with_prefix() {
let filter = default_filter();
let mut val = json!("xoxb-12345-abcde");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[SLACK_TOKEN:xoxb-***]"));
}
#[test]
fn test_mask_string_stripe_live_key_masked_with_prefix() {
let filter = default_filter();
let key = format!("sk_live_{}", "a".repeat(24));
let mut val = json!(key);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[STRIPE_KEY:sk_live_***]"));
}
#[test]
fn test_mask_string_bearer_token_masked_no_prefix() {
let filter = default_filter();
let mut val = json!("Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[BEARER:***]"));
}
#[test]
fn test_mask_string_private_key_block_masked() {
let filter = default_filter();
let pem =
"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA\n-----END RSA PRIVATE KEY-----";
let mut val = json!(pem);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[PRIVATE_KEY:***]"));
}
#[test]
fn test_filter_value_nested_json_masks_leaf_not_keys() {
let filter = default_filter();
let mut val = json!({"a": {"b": "AKIA1234567890ABCDEF"}});
filter_event_with(&mut val, &filter);
assert_eq!(val, json!({"a": {"b": "[AWS_KEY:AKIA***]"}}));
}
#[test]
fn test_filter_value_array_all_secrets_masked() {
let filter = default_filter();
let token = format!("ghp_{}", "b".repeat(36));
let mut val = json!(["AKIA1234567890ABCDEF", token]);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(["[AWS_KEY:AKIA***]", "[GITHUB_TOKEN:ghp_***]"]));
}
#[test]
fn test_mask_string_non_secrets_pass_through() {
let filter = default_filter();
let mut val = json!("hello world");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("hello world"));
let mut val2 = json!("sk_not_a_key");
filter_event_with(&mut val2, &filter);
assert_eq!(val2, json!("sk_not_a_key"));
}
#[test]
fn test_filter_value_non_string_types_pass_through() {
let filter = default_filter();
let mut val = json!({
"count": 42,
"flag": true,
"nothing": null,
"ratio": 1.5
});
let expected = val.clone();
filter_event_with(&mut val, &filter);
assert_eq!(val, expected);
}
#[test]
fn test_truncate_string_over_limit_adds_marker() {
let filter = default_filter();
let long_str: String = "x".repeat(4097);
let original_len = long_str.len();
let mut val = json!(long_str);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
result.ends_with(&format!("[TRUNCATED:{}B]", original_len)),
"Expected truncation marker, got: {}",
&result[result.len().saturating_sub(30)..]
);
let marker = format!("[TRUNCATED:{}B]", original_len);
let content = result.strip_suffix(&marker).unwrap();
assert_eq!(content.len(), 4096);
assert!(content.chars().all(|c| c == 'x'));
}
#[test]
fn test_truncate_string_at_limit_not_truncated() {
let filter = default_filter();
let exact_str: String = "y".repeat(4096);
let mut val = json!(exact_str.clone());
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(exact_str));
}
#[test]
fn test_mask_string_multiple_secrets_all_masked() {
let filter = default_filter();
let aws = "AKIA1234567890ABCDEF";
let openai = format!("sk-{}", "c".repeat(48));
let combined = format!("key1={} key2={}", aws, openai);
let mut val = json!(combined);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
result.contains("[AWS_KEY:AKIA***]"),
"AWS key should be masked"
);
assert!(
result.contains("[OPENAI_KEY:sk-***]"),
"OpenAI key should be masked"
);
assert!(
!result.contains("AKIA1234567890ABCDEF"),
"Raw AWS key must not be present"
);
assert!(
!result.contains("sk-ccc"),
"Raw OpenAI key must not be present"
);
}
#[test]
fn test_filter_event_with_custom_pattern_applied() {
let extra = vec!["MY_SECRET_[0-9]{6}".to_string()];
let filter = PrivacyFilter::new(&extra);
let mut val = json!("token=MY_SECRET_123456");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("token=[CUSTOM:***]"));
}
#[test]
fn test_mask_string_empty_string_unchanged() {
let filter = default_filter();
let mut val = json!("");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(""));
}
#[test]
fn test_mask_then_truncate_order_preserved() {
let filter = default_filter();
let padding: String = "a".repeat(4090);
let combined = format!("{}AKIA1234567890ABCDEF", padding);
assert!(combined.len() > 4096);
let mut val = json!(combined);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
!result.contains("AKIA1234567890ABCDEF"),
"Raw AWS key must not appear after filter"
);
}
}