use std::sync::LazyLock;
use regex::Regex;
use serde_json::Value;
const REDACTED_SECRET: &str = "[REDACTED_SECRET]";
const REDACTED_PRIVATE_KEY: &str = "[REDACTED_PRIVATE_KEY]";
const REDACTED_PII: &str = "[REDACTED_PII]";
const MAX_JSON_SANITIZE_DEPTH: usize = 128;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct SanitizationReport {
pub text_redactions: usize,
pub key_redactions: usize,
pub blocked_secret_hits: usize,
pub depth_redactions: usize,
pub pii_redactions: usize,
}
impl SanitizationReport {
pub fn changed(&self) -> bool {
self.text_redactions > 0
|| self.key_redactions > 0
|| self.blocked_secret_hits > 0
|| self.depth_redactions > 0
|| self.pii_redactions > 0
}
pub fn merge(self, rhs: Self) -> Self {
Self {
text_redactions: self.text_redactions + rhs.text_redactions,
key_redactions: self.key_redactions + rhs.key_redactions,
blocked_secret_hits: self.blocked_secret_hits + rhs.blocked_secret_hits,
depth_redactions: self.depth_redactions + rhs.depth_redactions,
pii_redactions: self.pii_redactions + rhs.pii_redactions,
}
}
}
#[derive(Debug, Clone)]
pub struct Sanitized<T> {
pub value: T,
pub report: SanitizationReport,
}
static BLOCK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(
r"(?is)-----BEGIN(?: [A-Z]+)? PRIVATE KEY-----.*?-----END(?: [A-Z]+)? PRIVATE KEY-----",
)
.expect("valid private key block"),
Regex::new(r"(?is)-----BEGIN OPENSSH PRIVATE KEY-----.*?-----END OPENSSH PRIVATE KEY-----")
.expect("valid openssh private key block"),
Regex::new(
r"(?is)-----BEGIN PGP PRIVATE KEY BLOCK-----.*?-----END PGP PRIVATE KEY BLOCK-----",
)
.expect("valid pgp private key block"),
]
});
static REDACTION_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
vec![
(
Regex::new(r"(?i)(bearer\s+)[A-Za-z0-9._~+/=-]{8,}").expect("valid bearer redaction"),
"${1}[REDACTED]",
),
(
Regex::new(r#"(?i)(api[_-]?key\s*[=:\s]\s*["']?)[^\s"']+"#)
.expect("valid api key redaction"),
"${1}[REDACTED]",
),
(
Regex::new(
r#"(?i)\b(token|access[_-]?token|refresh[_-]?token|client[_-]?secret|password|secret)\b\s*[=:\s]\s*["']?[^\s"'&]+"#,
)
.expect("valid token redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("valid openai key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b").expect("valid github token redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bAKIA[0-9A-Z]{16}\b").expect("valid aws key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bASIA[0-9A-Z]{16}\b").expect("valid aws sts key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9._-]{8,}\.[A-Za-z0-9._-]{8,}\b")
.expect("valid jwt redaction"),
"[REDACTED]",
),
(
Regex::new(
r#"(?i)\b(access_token|refresh_token|id_token|authorization_code|code_verifier|code_challenge)\b\s*[=:\s]\s*["']?[^\s"'&]+"#,
)
.expect("valid oauth token redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bAIza[0-9A-Za-z\-_]{35}\b").expect("valid google api key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bsk-ant-[A-Za-z0-9\-_]{16,}\b").expect("valid anthropic key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bsk-(?:proj|org)-[A-Za-z0-9\-_]{12,}\b")
.expect("valid openai scoped key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\b(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b")
.expect("valid stripe key redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bxox(?:a|b|p|s|r)-[A-Za-z0-9-]{10,}\b")
.expect("valid slack token redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bgithub_pat_[A-Za-z0-9_]{20,}\b").expect("valid github pat redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bglpat-[A-Za-z0-9\-_]{16,}\b").expect("valid gitlab pat redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bnpm_[A-Za-z0-9]{20,}\b").expect("valid npm token redaction"),
"[REDACTED]",
),
(
Regex::new(r"\bSG\.[A-Za-z0-9_\-]{16,}\.[A-Za-z0-9_\-]{16,}\b")
.expect("valid sendgrid key redaction"),
"[REDACTED]",
),
]
});
static PII_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
vec![
(
Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
.expect("valid email pii"),
REDACTED_PII,
),
(
Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").expect("valid ssn pii"),
REDACTED_PII,
),
(
Regex::new(r"\b\d{3}\.\d{3}\.\d{3}-\d{2}\b").expect("valid cpf pii"),
REDACTED_PII,
),
(
Regex::new(r"\+\d{7,15}\b").expect("valid e164 pii"),
REDACTED_PII,
),
]
});
pub fn has_likely_secret(value: &str) -> bool {
BLOCK_PATTERNS.iter().any(|p| p.is_match(value))
|| REDACTION_PATTERNS.iter().any(|(p, _)| p.is_match(value))
}
pub fn has_likely_pii(value: &str) -> bool {
PII_PATTERNS.iter().any(|(p, _)| p.is_match(value))
}
pub fn sanitize_text(value: &str) -> Sanitized<String> {
let mut out = value.to_string();
let mut report = SanitizationReport::default();
for pattern in BLOCK_PATTERNS.iter() {
let hits = pattern.find_iter(&out).count();
if hits > 0 {
report.blocked_secret_hits += hits;
out = pattern.replace_all(&out, REDACTED_PRIVATE_KEY).into_owned();
}
}
for (pattern, replacement) in REDACTION_PATTERNS.iter() {
let hits = pattern.find_iter(&out).count();
if hits > 0 {
report.text_redactions += hits;
out = pattern.replace_all(&out, *replacement).into_owned();
}
}
for (pattern, replacement) in PII_PATTERNS.iter() {
let hits = pattern.find_iter(&out).count();
if hits > 0 {
report.pii_redactions += hits;
out = pattern.replace_all(&out, *replacement).into_owned();
}
}
Sanitized { value: out, report }
}
pub fn sanitize_json(value: &Value) -> Sanitized<Value> {
sanitize_json_inner(value, 0)
}
fn sanitize_json_inner(value: &Value, depth: usize) -> Sanitized<Value> {
if depth >= MAX_JSON_SANITIZE_DEPTH {
return Sanitized {
value: Value::String(REDACTED_SECRET.to_string()),
report: SanitizationReport {
depth_redactions: 1,
..SanitizationReport::default()
},
};
}
match value {
Value::Object(map) => {
let mut out = serde_json::Map::new();
let mut report = SanitizationReport::default();
for (key, value) in map {
if is_sensitive_key(key) {
report.key_redactions += 1;
out.insert(key.clone(), Value::String(REDACTED_SECRET.to_string()));
continue;
}
let sanitized = sanitize_json_inner(value, depth + 1);
report = report.merge(sanitized.report);
out.insert(key.clone(), sanitized.value);
}
Sanitized {
value: Value::Object(out),
report,
}
}
Value::Array(items) => {
let mut out = Vec::with_capacity(items.len());
let mut report = SanitizationReport::default();
for item in items {
let sanitized = sanitize_json_inner(item, depth + 1);
report = report.merge(sanitized.report);
out.push(sanitized.value);
}
Sanitized {
value: Value::Array(out),
report,
}
}
Value::String(value) => {
let sanitized = sanitize_text(value);
Sanitized {
value: Value::String(sanitized.value),
report: sanitized.report,
}
}
_ => Sanitized {
value: value.clone(),
report: SanitizationReport::default(),
},
}
}
fn is_sensitive_key(key: &str) -> bool {
let normalized: String = key
.chars()
.filter(|c| c.is_ascii_alphanumeric())
.map(|c| c.to_ascii_lowercase())
.collect();
matches!(
normalized.as_str(),
"apikey"
| "token"
| "accesstoken"
| "refreshtoken"
| "authorization"
| "password"
| "secret"
| "clientsecret"
) || normalized.ends_with("token")
|| normalized.ends_with("apikey")
|| normalized.ends_with("clientsecret")
|| normalized.contains("password")
|| normalized.contains("secret")
|| normalized.ends_with("key")
}
#[cfg(test)]
#[path = "safety_tests.rs"]
mod tests;