use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::sync::LazyLock;
use regex::Regex;
pub const TOKEN_REDACTION_DIAGNOSTIC: &str = "HARN-OAU-001";
pub const TOKEN_REDACTION_AUDIT_TOPIC: &str = "audit.token_redaction";
const MAX_SCAN_INPUT_BYTES: usize = 256 * 1024;
#[derive(Clone)]
pub struct NamedPattern {
pub name: &'static str,
pub regex: Regex,
}
impl std::fmt::Debug for NamedPattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("NamedPattern")
.field("name", &self.name)
.field("regex", &self.regex.as_str())
.finish()
}
}
pub static DEFAULT_PATTERNS: LazyLock<Vec<NamedPattern>> = LazyLock::new(|| {
vec![
NamedPattern {
name: "jwt",
regex: Regex::new(r"\beyJ[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\b")
.expect("jwt regex compiles"),
},
NamedPattern {
name: "github_token",
regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{36,255}\b")
.expect("github_token regex compiles"),
},
NamedPattern {
name: "github_pat_fine",
regex: Regex::new(r"\bgithub_pat_[A-Za-z0-9_]{20,255}\b")
.expect("github_pat_fine regex compiles"),
},
NamedPattern {
name: "slack_token",
regex: Regex::new(r"\bxox[abprs]-[A-Za-z0-9-]{10,255}\b")
.expect("slack_token regex compiles"),
},
NamedPattern {
name: "aws_access_key",
regex: Regex::new(r"\b(?:AKIA|ASIA|AGPA|AIDA|ANPA|AROA|AIPA)[A-Z0-9]{16}\b")
.expect("aws_access_key regex compiles"),
},
NamedPattern {
name: "gitlab_token",
regex: Regex::new(r"\bglpat-[A-Za-z0-9_-]{20,255}\b")
.expect("gitlab_token regex compiles"),
},
NamedPattern {
name: "npm_token",
regex: Regex::new(r"\bnpm_[A-Za-z0-9]{36}\b").expect("npm_token regex compiles"),
},
NamedPattern {
name: "openai_key",
regex: Regex::new(r"\bsk-[A-Za-z0-9_-]{20,255}\b").expect("openai_key regex compiles"),
},
NamedPattern {
name: "stripe_key",
regex: Regex::new(r"\b(?:rk|sk)_(?:live|test)_[0-9A-Za-z]{16,255}\b")
.expect("stripe_key regex compiles"),
},
NamedPattern {
name: "bearer_token",
regex: Regex::new(r"(?i)\bBearer\s+[A-Za-z0-9._\-+/=]{12,}")
.expect("bearer_token regex compiles"),
},
]
});
thread_local! {
static CUSTOM_PATTERNS: RefCell<Vec<NamedPattern>> = const { RefCell::new(Vec::new()) };
static AUDIT_SINK: RefCell<Option<AuditSink>> = const { RefCell::new(None) };
static AUDIT_RING: RefCell<Vec<RedactionEvent>> = const { RefCell::new(Vec::new()) };
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RedactionEvent {
pub pattern_name: String,
pub match_count: usize,
pub bytes_redacted: usize,
}
pub type AuditSink = std::rc::Rc<dyn Fn(&RedactionEvent)>;
pub fn register_custom_pattern(name: impl Into<String>, regex_source: &str) -> Result<(), String> {
let regex = Regex::new(regex_source).map_err(|error| format!("invalid regex: {error}"))?;
let name_static: &'static str = Box::leak(name.into().into_boxed_str());
CUSTOM_PATTERNS.with(|cell| {
cell.borrow_mut().push(NamedPattern {
name: name_static,
regex,
});
});
Ok(())
}
pub fn clear_custom_patterns() {
CUSTOM_PATTERNS.with(|cell| cell.borrow_mut().clear());
}
pub fn default_pattern_names() -> Vec<&'static str> {
DEFAULT_PATTERNS.iter().map(|p| p.name).collect()
}
pub fn custom_pattern_names() -> Vec<String> {
CUSTOM_PATTERNS.with(|cell| cell.borrow().iter().map(|p| p.name.to_string()).collect())
}
pub fn install_audit_sink(sink: Option<AuditSink>) -> Option<AuditSink> {
AUDIT_SINK.with(|cell| std::mem::replace(&mut *cell.borrow_mut(), sink))
}
fn emit_audit(events: &[RedactionEvent]) {
if events.is_empty() {
return;
}
AUDIT_RING.with(|ring| {
let mut ring = ring.borrow_mut();
for event in events {
if ring.len() >= 1024 {
ring.remove(0);
}
ring.push(event.clone());
}
});
let sink = AUDIT_SINK.with(|cell| cell.borrow().clone());
if let Some(sink) = sink {
for event in events {
sink(event);
}
}
}
pub fn drain_audit_ring() -> Vec<RedactionEvent> {
AUDIT_RING.with(|ring| std::mem::take(&mut *ring.borrow_mut()))
}
pub fn clear_audit_ring() {
AUDIT_RING.with(|ring| ring.borrow_mut().clear());
}
fn replacement_for(name: &str, matched: &str) -> String {
format!("<redacted:{name}:{}>", matched.len())
}
pub fn scan_secret_patterns<'a>(input: &'a str, placeholder: &str) -> Cow<'a, str> {
if input.is_empty() {
return Cow::Borrowed(input);
}
if input.len() > MAX_SCAN_INPUT_BYTES {
return Cow::Borrowed(input);
}
let use_named_placeholder = placeholder == crate::redact::REDACTED_PLACEHOLDER;
let mut owned: Option<String> = None;
let mut audit_events: BTreeMap<&'static str, RedactionEvent> = BTreeMap::new();
let custom: Vec<NamedPattern> = CUSTOM_PATTERNS.with(|cell| cell.borrow().clone());
let all_patterns = DEFAULT_PATTERNS.iter().chain(custom.iter());
for pattern in all_patterns {
let target: &str = owned.as_deref().unwrap_or(input);
let matches: Vec<(usize, usize)> = pattern
.regex
.find_iter(target)
.map(|m| (m.start(), m.end()))
.collect();
if matches.is_empty() {
continue;
}
let total_bytes: usize = matches.iter().map(|(s, e)| e - s).sum();
audit_events.insert(
pattern.name,
RedactionEvent {
pattern_name: pattern.name.to_string(),
match_count: matches.len(),
bytes_redacted: total_bytes,
},
);
let mut buffer = target.to_string();
for (start, end) in matches.into_iter().rev() {
let matched_slice = &buffer[start..end];
let replacement = if use_named_placeholder {
replacement_for(pattern.name, matched_slice)
} else {
placeholder.to_string()
};
buffer.replace_range(start..end, &replacement);
}
owned = Some(buffer);
}
let result = match owned {
Some(value) if value == input => Cow::Borrowed(input),
Some(value) => Cow::Owned(value),
None => Cow::Borrowed(input),
};
if matches!(result, Cow::Owned(_)) {
let events: Vec<RedactionEvent> = audit_events.into_values().collect();
emit_audit(&events);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
fn run_clean() {
clear_custom_patterns();
install_audit_sink(None);
clear_audit_ring();
}
#[test]
fn returns_borrowed_when_clean() {
run_clean();
let out = scan_secret_patterns("just plain text", crate::redact::REDACTED_PLACEHOLDER);
assert!(matches!(out, Cow::Borrowed(_)));
}
#[test]
fn replaces_aws_and_github_tokens_with_named_placeholder() {
run_clean();
let input = "AKIAABCDEFGHIJKLMNOP and ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
let rendered = out.into_owned();
assert!(rendered.contains("<redacted:aws_access_key:20>"));
assert!(rendered.contains("<redacted:github_token:40>"));
assert!(!rendered.contains("AKIAABCDEFGHIJKLMNOP"));
}
#[test]
fn legacy_placeholder_path_still_works_for_url_param_values() {
run_clean();
let input = "AKIAABCDEFGHIJKLMNOP";
let out = scan_secret_patterns(input, "%5Bredacted%5D");
assert!(out.contains("%5Bredacted%5D"));
assert!(!out.contains("AKIAABCDEFGHIJKLMNOP"));
}
#[test]
fn replaces_bearer_token_inside_text() {
run_clean();
let input = "header: Authorization: Bearer abcDEFghi123_-+/=xyz tail";
let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
assert!(out.contains("<redacted:bearer_token:"));
assert!(!out.contains("abcDEFghi123_-+/=xyz"));
assert!(out.contains("tail"));
}
#[test]
fn replaces_jwt_tokens() {
run_clean();
let input = "token=eyJabcd.eyJefgh.signature_pad here";
let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
assert!(out.contains("<redacted:jwt:"));
assert!(!out.contains("eyJabcd.eyJefgh.signature_pad"));
}
#[test]
fn custom_pattern_redacts_and_is_introspectable() {
run_clean();
register_custom_pattern("acme_token", r"\bACME-[A-Z0-9]{8}\b").unwrap();
assert_eq!(custom_pattern_names(), vec!["acme_token".to_string()]);
let out = scan_secret_patterns(
"header ACME-12345678 trailer",
crate::redact::REDACTED_PLACEHOLDER,
);
assert!(
out.contains("<redacted:acme_token:13>"),
"expected acme_token redaction, got: {out}"
);
clear_custom_patterns();
assert!(custom_pattern_names().is_empty());
}
#[test]
fn audit_sink_receives_one_event_per_matching_pattern() {
use std::cell::RefCell;
use std::rc::Rc;
run_clean();
let captured: Rc<RefCell<Vec<RedactionEvent>>> = Rc::new(RefCell::new(Vec::new()));
let sink_captured = captured.clone();
install_audit_sink(Some(Rc::new(move |event| {
sink_captured.borrow_mut().push(event.clone());
})));
let input =
"AKIAABCDEFGHIJKLMNOP AKIA0000000000000000 ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let out = scan_secret_patterns(input, crate::redact::REDACTED_PLACEHOLDER);
assert!(matches!(out, Cow::Owned(_)));
let events = captured.borrow();
assert_eq!(events.len(), 2);
let by_name: BTreeMap<&str, &RedactionEvent> = events
.iter()
.map(|event| (event.pattern_name.as_str(), event))
.collect();
assert_eq!(by_name.get("aws_access_key").unwrap().match_count, 2);
assert_eq!(by_name.get("github_token").unwrap().match_count, 1);
drop(events);
install_audit_sink(None);
let ring = drain_audit_ring();
assert_eq!(ring.len(), 2);
}
#[test]
fn audit_ring_records_events_even_without_a_sink() {
run_clean();
let _ = scan_secret_patterns("AKIAABCDEFGHIJKLMNOP", crate::redact::REDACTED_PLACEHOLDER);
let ring = drain_audit_ring();
assert_eq!(ring.len(), 1);
assert_eq!(ring[0].pattern_name, "aws_access_key");
assert!(drain_audit_ring().is_empty());
}
#[test]
fn input_above_cap_is_passthrough() {
run_clean();
let huge = "AKIAABCDEFGHIJKLMNOP".repeat(MAX_SCAN_INPUT_BYTES / 20 + 1);
let out = scan_secret_patterns(&huge, crate::redact::REDACTED_PLACEHOLDER);
assert!(matches!(out, Cow::Borrowed(_)));
}
#[test]
fn default_pattern_names_are_stable() {
let names = default_pattern_names();
assert!(names.contains(&"jwt"));
assert!(names.contains(&"github_token"));
assert!(names.contains(&"github_pat_fine"));
assert!(names.contains(&"slack_token"));
assert!(names.contains(&"aws_access_key"));
assert!(names.contains(&"bearer_token"));
}
}