use once_cell::sync::Lazy;
use regex::Regex;
const REPLACEMENT: &str = "[REDACTED]";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RedactReport {
pub redacted: String,
pub hits: Vec<RedactHit>,
}
impl RedactReport {
pub fn is_clean(&self) -> bool {
self.hits.is_empty()
}
pub fn kinds(&self) -> Vec<String> {
let mut names: Vec<String> = self.hits.iter().map(|h| h.kind.clone()).collect();
names.sort();
names.dedup();
names
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RedactHit {
pub kind: String,
pub count: usize,
}
struct Pattern {
kind: &'static str,
regex: Regex,
}
static PATTERNS: Lazy<Vec<Pattern>> = Lazy::new(|| {
vec![
Pattern {
kind: "openai_api_key",
regex: Regex::new(r"sk-(?:proj-)?[A-Za-z0-9_\-]{20,}").unwrap(),
},
Pattern {
kind: "openai_pk_key",
regex: Regex::new(r"\bpk-[A-Za-z0-9_\-]{20,}").unwrap(),
},
Pattern {
kind: "anthropic_api_key",
regex: Regex::new(r"sk-ant-[A-Za-z0-9_\-]{20,}").unwrap(),
},
Pattern {
kind: "github_token",
regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{30,}").unwrap(),
},
Pattern {
kind: "slack_bot_token",
regex: Regex::new(r"\bxox[baprs]-[A-Za-z0-9-]{10,}").unwrap(),
},
Pattern {
kind: "google_api_key",
regex: Regex::new(r"\bAIza[0-9A-Za-z_\-]{35}").unwrap(),
},
Pattern {
kind: "aws_access_key_id",
regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").unwrap(),
},
Pattern {
kind: "bearer_token",
regex: Regex::new(r"(?i)Bearer\s+[A-Za-z0-9_\-\.=]{20,}").unwrap(),
},
Pattern {
kind: "jwt",
regex: Regex::new(
r"\beyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\b",
)
.unwrap(),
},
]
});
pub fn redact(text: &str) -> RedactReport {
let mut current = text.to_string();
let mut hits = Vec::new();
for pattern in PATTERNS.iter() {
let count = pattern.regex.find_iter(¤t).count();
if count == 0 {
continue;
}
current = pattern
.regex
.replace_all(¤t, REPLACEMENT)
.into_owned();
hits.push(RedactHit {
kind: pattern.kind.to_string(),
count,
});
}
RedactReport {
redacted: current,
hits,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn redact_returns_unchanged_text_when_clean() {
let report = redact("hello world");
assert_eq!(report.redacted, "hello world");
assert!(report.is_clean());
assert!(report.kinds().is_empty());
}
#[test]
fn redact_strips_openai_key() {
let raw = "use sk-abc1234567890DEFGHIJ for the call";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert!(!report.redacted.contains("sk-abc"));
assert_eq!(report.kinds(), vec!["openai_api_key"]);
assert_eq!(report.hits[0].count, 1);
}
#[test]
fn redact_strips_anthropic_key_first_when_overlap() {
let raw = "key=sk-ant-abc1234567890DEFGHIJ done";
let report = redact(raw);
assert_eq!(report.kinds().len(), 1, "{:?}", report.kinds());
assert!(report.redacted.contains(REPLACEMENT));
}
#[test]
fn redact_strips_github_token() {
let raw = "token=ghp_abcdefghijklmnop1234567890ABCDEF rest";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert_eq!(report.kinds(), vec!["github_token"]);
}
#[test]
fn redact_strips_bearer_token_keeping_surrounding_text() {
let raw = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 ok";
let report = redact(raw);
assert!(report.redacted.contains("Authorization:"));
assert!(report.redacted.contains(REPLACEMENT));
assert!(!report.redacted.contains("eyJhbGc"));
assert!(report.kinds().contains(&"bearer_token".to_string()));
}
#[test]
fn redact_strips_slack_token() {
let raw = "send to xoxb-12345678901-aBcDeFgHiJkLmN done";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert_eq!(report.kinds(), vec!["slack_bot_token"]);
}
#[test]
fn redact_strips_google_api_key() {
let raw = "key=AIzaSy0123456789ABCDEF0123456789ABCDEF012 rest";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert_eq!(report.kinds(), vec!["google_api_key"]);
}
#[test]
fn redact_strips_aws_access_key() {
let raw = "AKIAIOSFODNN7EXAMPLE is the key";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert_eq!(report.kinds(), vec!["aws_access_key_id"]);
}
#[test]
fn redact_strips_jwt_when_standalone() {
let raw = "tok=eyJabc1234567890.eyJpYXQiOjE3MDA.signaturE12345 done";
let report = redact(raw);
assert!(report.redacted.contains(REPLACEMENT));
assert!(report.kinds().contains(&"jwt".to_string()));
}
#[test]
fn redact_records_repeat_count_for_same_pattern() {
let raw = "first=sk-aaa1234567890ABCDEFGHIJ second=sk-bbb1234567890ABCDEFGHIJ";
let report = redact(raw);
assert_eq!(report.hits.len(), 1);
assert_eq!(report.hits[0].kind, "openai_api_key");
assert_eq!(report.hits[0].count, 2);
}
#[test]
fn redact_handles_multiple_kinds_in_one_text() {
let raw = "use sk-abc1234567890ABCDEFGHIJ and ghp_abcdefghijklmnop1234567890ABCDEF";
let report = redact(raw);
let kinds = report.kinds();
assert!(kinds.contains(&"openai_api_key".to_string()));
assert!(kinds.contains(&"github_token".to_string()));
assert!(report.redacted.matches(REPLACEMENT).count() >= 2);
}
#[test]
fn redact_does_not_match_short_obvious_non_secrets() {
let raw = "id=sk-abc1 short=ghp_abc";
let report = redact(raw);
assert!(report.is_clean());
assert_eq!(report.redacted, raw);
}
#[test]
fn redact_does_not_corrupt_unicode() {
let raw = "测试 sk-abc1234567890DEFGHIJ 完成";
let report = redact(raw);
assert!(report.redacted.contains("测试"));
assert!(report.redacted.contains("完成"));
assert!(report.redacted.contains(REPLACEMENT));
}
#[test]
fn kinds_sorted_and_deduped() {
let report = RedactReport {
redacted: "[REDACTED]".into(),
hits: vec![
RedactHit {
kind: "github_token".into(),
count: 1,
},
RedactHit {
kind: "github_token".into(),
count: 2,
},
RedactHit {
kind: "openai_api_key".into(),
count: 1,
},
],
};
assert_eq!(report.kinds(), vec!["github_token", "openai_api_key"]);
}
}