use std::collections::BTreeMap;
use std::sync::OnceLock;
use regex::Regex;
use tracing::warn;
const MAX_STRING_BYTES: usize = 4096;
struct MaskPattern {
regex: Regex,
label: &'static str,
prefix_len: usize,
}
fn builtin_patterns() -> Vec<MaskPattern> {
vec![
MaskPattern {
regex: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
label: "AWS_KEY",
prefix_len: 4, },
MaskPattern {
regex: Regex::new(r"github_pat_[a-zA-Z0-9_]{82}").unwrap(),
label: "GITHUB_TOKEN",
prefix_len: 11, },
MaskPattern {
regex: Regex::new(r"ghp_[a-zA-Z0-9]{36}").unwrap(),
label: "GITHUB_TOKEN",
prefix_len: 4, },
MaskPattern {
regex: Regex::new(r"sk_live_[0-9a-zA-Z]{24}").unwrap(),
label: "STRIPE_KEY",
prefix_len: 8, },
MaskPattern {
regex: Regex::new(r"sk_test_[0-9a-zA-Z]{24}").unwrap(),
label: "STRIPE_KEY",
prefix_len: 8, },
MaskPattern {
regex: Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap(),
label: "OPENAI_KEY",
prefix_len: 3, },
MaskPattern {
regex: Regex::new(r"xox[baprs]-[0-9a-zA-Z\-]{10,48}").unwrap(),
label: "SLACK_TOKEN",
prefix_len: 5, },
MaskPattern {
regex: Regex::new(r"Bearer\s+[a-zA-Z0-9\-._~+/]+=*").unwrap(),
label: "BEARER",
prefix_len: 0,
},
MaskPattern {
regex: Regex::new(
r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |OPENSSH )?PRIVATE KEY-----",
)
.unwrap(),
label: "PRIVATE_KEY",
prefix_len: 0,
},
]
}
pub struct PrivacyFilter {
patterns: Vec<MaskPattern>,
}
impl PrivacyFilter {
pub fn new(extra_patterns: &[String]) -> Self {
let mut patterns = builtin_patterns();
for pat_str in extra_patterns {
match Regex::new(pat_str) {
Ok(regex) => {
patterns.push(MaskPattern {
regex,
label: "CUSTOM",
prefix_len: 0,
});
}
Err(e) => {
warn!(pattern = %pat_str, error = %e, "Skipping invalid privacy regex pattern");
}
}
}
Self { patterns }
}
}
static FILTER: OnceLock<PrivacyFilter> = OnceLock::new();
pub fn init_filter(extra_patterns: &[String]) {
FILTER.get_or_init(|| PrivacyFilter::new(extra_patterns));
}
pub fn get_filter() -> &'static PrivacyFilter {
FILTER
.get()
.expect("privacy filter not initialized — call init_filter() at startup")
}
#[derive(Debug, Default, Clone)]
pub struct HitCounter {
counts: BTreeMap<&'static str, u64>,
}
impl HitCounter {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, label: &'static str, n: u64) {
if n == 0 {
return;
}
*self.counts.entry(label).or_insert(0) += n;
}
pub fn total(&self) -> u64 {
self.counts.values().sum()
}
pub fn iter(&self) -> impl Iterator<Item = (&'static str, u64)> + '_ {
self.counts.iter().map(|(k, v)| (*k, *v))
}
pub fn as_map(&self) -> &BTreeMap<&'static str, u64> {
&self.counts
}
}
fn mask_string_with_hits(s: &str, patterns: &[MaskPattern], hits: &mut HitCounter) -> String {
let mut result = s.to_string();
for pat in patterns {
result = pat
.regex
.replace_all(&result, |caps: ®ex::Captures| {
hits.add(pat.label, 1);
let matched = &caps[0];
if pat.prefix_len > 0 && matched.len() >= pat.prefix_len {
let prefix = &matched[..pat.prefix_len];
format!("[{}:{}***]", pat.label, prefix)
} else {
format!("[{}:***]", pat.label)
}
})
.into_owned();
}
result
}
fn truncate_string(s: String) -> String {
if s.len() > MAX_STRING_BYTES {
let original_len = s.len();
let mut truncated = s;
truncated.truncate(MAX_STRING_BYTES);
while !truncated.is_char_boundary(truncated.len()) {
truncated.pop();
}
format!("{}[TRUNCATED:{}B]", truncated, original_len)
} else {
s
}
}
pub fn filter_value(value: &mut serde_json::Value, filter: &PrivacyFilter) {
let mut hits = HitCounter::new();
filter_value_with_hits(value, filter, &mut hits);
}
pub fn filter_value_with_hits(
value: &mut serde_json::Value,
filter: &PrivacyFilter,
hits: &mut HitCounter,
) {
match value {
serde_json::Value::String(s) => {
let masked = mask_string_with_hits(s, &filter.patterns, hits);
let truncated = truncate_string(masked);
*s = truncated;
}
serde_json::Value::Object(map) => {
for v in map.values_mut() {
filter_value_with_hits(v, filter, hits);
}
}
serde_json::Value::Array(arr) => {
for v in arr.iter_mut() {
filter_value_with_hits(v, filter, hits);
}
}
_ => {}
}
}
pub fn filter_event(value: &mut serde_json::Value) {
let filter = get_filter();
filter_value(value, filter);
}
pub fn filter_event_with(value: &mut serde_json::Value, filter: &PrivacyFilter) {
filter_value(value, filter);
}
pub fn filter_event_with_hits(
value: &mut serde_json::Value,
filter: &PrivacyFilter,
hits: &mut HitCounter,
) {
filter_value_with_hits(value, filter, hits);
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
fn default_filter() -> PrivacyFilter {
PrivacyFilter::new(&[])
}
#[test]
fn test_mask_string_aws_key_masked_with_prefix() {
let filter = default_filter();
let mut val = json!("AKIA1234567890ABCDEF");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[AWS_KEY:AKIA***]"));
}
#[test]
fn test_mask_string_github_token_ghp_masked_with_prefix() {
let filter = default_filter();
let token = format!("ghp_{}", "a".repeat(36));
let mut val = json!(token);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[GITHUB_TOKEN:ghp_***]"));
}
#[test]
fn test_mask_string_github_pat_masked_with_prefix() {
let filter = default_filter();
let token = format!("github_pat_{}", "a".repeat(82));
let mut val = json!(token);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[GITHUB_TOKEN:github_pat_***]"));
}
#[test]
fn test_mask_string_openai_key_masked_with_prefix() {
let filter = default_filter();
let key = format!("sk-{}", "a".repeat(48));
let mut val = json!(key);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[OPENAI_KEY:sk-***]"));
}
#[test]
fn test_mask_string_slack_token_masked_with_prefix() {
let filter = default_filter();
let mut val = json!("xoxb-12345-abcde");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[SLACK_TOKEN:xoxb-***]"));
}
#[test]
fn test_mask_string_stripe_live_key_masked_with_prefix() {
let filter = default_filter();
let key = format!("sk_live_{}", "a".repeat(24));
let mut val = json!(key);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[STRIPE_KEY:sk_live_***]"));
}
#[test]
fn test_mask_string_bearer_token_masked_no_prefix() {
let filter = default_filter();
let mut val = json!("Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[BEARER:***]"));
}
#[test]
fn test_mask_string_private_key_block_masked() {
let filter = default_filter();
let pem =
"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA\n-----END RSA PRIVATE KEY-----";
let mut val = json!(pem);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("[PRIVATE_KEY:***]"));
}
#[test]
fn test_filter_value_nested_json_masks_leaf_not_keys() {
let filter = default_filter();
let mut val = json!({"a": {"b": "AKIA1234567890ABCDEF"}});
filter_event_with(&mut val, &filter);
assert_eq!(val, json!({"a": {"b": "[AWS_KEY:AKIA***]"}}));
}
#[test]
fn test_filter_value_array_all_secrets_masked() {
let filter = default_filter();
let token = format!("ghp_{}", "b".repeat(36));
let mut val = json!(["AKIA1234567890ABCDEF", token]);
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(["[AWS_KEY:AKIA***]", "[GITHUB_TOKEN:ghp_***]"]));
}
#[test]
fn test_mask_string_non_secrets_pass_through() {
let filter = default_filter();
let mut val = json!("hello world");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("hello world"));
let mut val2 = json!("sk_not_a_key");
filter_event_with(&mut val2, &filter);
assert_eq!(val2, json!("sk_not_a_key"));
}
#[test]
fn test_filter_value_non_string_types_pass_through() {
let filter = default_filter();
let mut val = json!({
"count": 42,
"flag": true,
"nothing": null,
"ratio": 1.5
});
let expected = val.clone();
filter_event_with(&mut val, &filter);
assert_eq!(val, expected);
}
#[test]
fn test_truncate_string_over_limit_adds_marker() {
let filter = default_filter();
let long_str: String = "x".repeat(4097);
let original_len = long_str.len();
let mut val = json!(long_str);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
result.ends_with(&format!("[TRUNCATED:{}B]", original_len)),
"Expected truncation marker, got: {}",
&result[result.len().saturating_sub(30)..]
);
let marker = format!("[TRUNCATED:{}B]", original_len);
let content = result.strip_suffix(&marker).unwrap();
assert_eq!(content.len(), 4096);
assert!(content.chars().all(|c| c == 'x'));
}
#[test]
fn test_truncate_string_at_limit_not_truncated() {
let filter = default_filter();
let exact_str: String = "y".repeat(4096);
let mut val = json!(exact_str.clone());
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(exact_str));
}
#[test]
fn test_mask_string_multiple_secrets_all_masked() {
let filter = default_filter();
let aws = "AKIA1234567890ABCDEF";
let openai = format!("sk-{}", "c".repeat(48));
let combined = format!("key1={} key2={}", aws, openai);
let mut val = json!(combined);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
result.contains("[AWS_KEY:AKIA***]"),
"AWS key should be masked"
);
assert!(
result.contains("[OPENAI_KEY:sk-***]"),
"OpenAI key should be masked"
);
assert!(
!result.contains("AKIA1234567890ABCDEF"),
"Raw AWS key must not be present"
);
assert!(
!result.contains("sk-ccc"),
"Raw OpenAI key must not be present"
);
}
#[test]
fn test_filter_event_with_custom_pattern_applied() {
let extra = vec!["MY_SECRET_[0-9]{6}".to_string()];
let filter = PrivacyFilter::new(&extra);
let mut val = json!("token=MY_SECRET_123456");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!("token=[CUSTOM:***]"));
}
#[test]
fn test_mask_string_empty_string_unchanged() {
let filter = default_filter();
let mut val = json!("");
filter_event_with(&mut val, &filter);
assert_eq!(val, json!(""));
}
#[test]
fn test_hit_counter_records_one_hit_per_match() {
let filter = default_filter();
let mut hits = HitCounter::new();
let mut val = json!("AKIA1234567890ABCDEF");
filter_event_with_hits(&mut val, &filter, &mut hits);
assert_eq!(hits.total(), 1);
assert_eq!(hits.as_map().get("AWS_KEY"), Some(&1));
}
#[test]
fn test_hit_counter_aggregates_multiple_patterns_in_one_string() {
let filter = default_filter();
let mut hits = HitCounter::new();
let combined = format!("AKIA1234567890ABCDEF then sk-{}", "a".repeat(48));
let mut val = json!(combined);
filter_event_with_hits(&mut val, &filter, &mut hits);
assert_eq!(hits.as_map().get("AWS_KEY"), Some(&1));
assert_eq!(hits.as_map().get("OPENAI_KEY"), Some(&1));
assert_eq!(hits.total(), 2);
}
#[test]
fn test_hit_counter_aggregates_across_nested_json() {
let filter = default_filter();
let mut hits = HitCounter::new();
let mut val = json!({
"a": "AKIA1111111111111111",
"nested": ["AKIA2222222222222222", "Bearer abc.def-ghi"],
});
filter_event_with_hits(&mut val, &filter, &mut hits);
assert_eq!(hits.as_map().get("AWS_KEY"), Some(&2));
assert_eq!(hits.as_map().get("BEARER"), Some(&1));
assert_eq!(hits.total(), 3);
}
#[test]
fn test_hit_counter_zero_when_no_matches() {
let filter = default_filter();
let mut hits = HitCounter::new();
let mut val = json!({"x": "no secrets here"});
filter_event_with_hits(&mut val, &filter, &mut hits);
assert_eq!(hits.total(), 0);
assert!(hits.as_map().is_empty());
}
#[test]
fn test_hit_counter_preserves_existing_counts_when_called_twice() {
let filter = default_filter();
let mut hits = HitCounter::new();
let mut a = json!("AKIA1234567890ABCDEF");
filter_event_with_hits(&mut a, &filter, &mut hits);
let mut b = json!("AKIA9999999999999999");
filter_event_with_hits(&mut b, &filter, &mut hits);
assert_eq!(hits.as_map().get("AWS_KEY"), Some(&2));
}
#[test]
fn test_mask_then_truncate_order_preserved() {
let filter = default_filter();
let padding: String = "a".repeat(4090);
let combined = format!("{}AKIA1234567890ABCDEF", padding);
assert!(combined.len() > 4096);
let mut val = json!(combined);
filter_event_with(&mut val, &filter);
let result = val.as_str().unwrap();
assert!(
!result.contains("AKIA1234567890ABCDEF"),
"Raw AWS key must not appear after filter"
);
}
}