use super::regexes::{
RE_AWS_KEY, RE_BEARER, RE_CONN_STRING, RE_GITHUB_TOKEN, RE_GOOGLE_KEY, RE_JWT, RE_PEM,
RE_SK_PREFIX, RE_SLACK_TOKEN,
};
use super::rules::{apply_regex, redact_env_kv, redact_posix_paths, redact_windows_paths};
use super::types::{MAX_BODY_BYTES, ScrubChange, ScrubResult};
pub fn scrub(text: &str) -> ScrubResult {
let (cleaned, changes) = scrub_inner(text);
let summary = build_summary(&changes);
ScrubResult {
text: cleaned,
changes,
redaction_summary: summary,
}
}
pub fn scrub_compat(text: &str) -> (String, Vec<ScrubChange>) {
let result = scrub(text);
(result.text, result.changes)
}
fn scrub_inner(text: &str) -> (String, Vec<ScrubChange>) {
let mut result = text.to_string();
let mut changes: Vec<ScrubChange> = Vec::new();
let (r, n) = apply_regex(&result, &RE_PEM, "[REDACTED_PRIVATE_KEY]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "PemPrivateKey",
hint: format!("{n} PEM private-key block(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_BEARER, "[REDACTED_TOKEN]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "BearerToken",
hint: format!("{n} bearer/auth token(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_JWT, "[REDACTED_JWT]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "JwtToken",
hint: format!("{n} JWT string(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_SK_PREFIX, "[REDACTED_API_KEY]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "SkApiKey",
hint: format!("{n} sk-* API key(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_GITHUB_TOKEN, "[REDACTED_GITHUB_TOKEN]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "GithubToken",
hint: format!("{n} GitHub token(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_AWS_KEY, "[REDACTED_AWS_KEY]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "AwsKey",
hint: format!("{n} AWS access key(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_GOOGLE_KEY, "[REDACTED_GOOGLE_KEY]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "GoogleKey",
hint: format!("{n} Google API key(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_SLACK_TOKEN, "[REDACTED_SLACK_TOKEN]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "SlackToken",
hint: format!("{n} Slack token(s) redacted"),
});
}
let (r, n) = apply_regex(&result, &RE_CONN_STRING, "[REDACTED_CONN_STRING]");
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "ConnString",
hint: format!("{n} connection string(s) with credentials redacted"),
});
}
let (r, n) = redact_env_kv(&result);
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "EnvSecret",
hint: format!("{n} key=value secret(s) redacted"),
});
}
let (r, n) = redact_posix_paths(&result);
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "AbsolutePath",
hint: format!("{n} absolute path(s) replaced with ~"),
});
}
let (r, n) = redact_windows_paths(&result);
if n > 0 {
result = r;
changes.push(ScrubChange {
pattern: "WindowsPath",
hint: format!("{n} Windows path(s) replaced with ~"),
});
}
if result.len() > MAX_BODY_BYTES {
let mut boundary = MAX_BODY_BYTES;
while !result.is_char_boundary(boundary) {
boundary -= 1;
}
result.truncate(boundary);
result.push_str("\n\n[... truncated — body exceeded 16 KiB ...]");
changes.push(ScrubChange {
pattern: "Truncation",
hint: format!("body truncated to {MAX_BODY_BYTES} bytes"),
});
}
(result, changes)
}
pub(super) fn build_summary(changes: &[ScrubChange]) -> String {
const SECRET_PATTERNS: &[&str] = &[
"BearerToken",
"JwtToken",
"SkApiKey",
"GithubToken",
"AwsKey",
"GoogleKey",
"SlackToken",
"ConnString",
"EnvSecret",
"PemPrivateKey",
];
const PATH_PATTERNS: &[&str] = &["AbsolutePath", "WindowsPath"];
if changes.is_empty() || changes.iter().all(|c| c.pattern == "Truncation") {
return "nothing redacted".to_string();
}
let secrets: usize = changes
.iter()
.filter(|c| SECRET_PATTERNS.contains(&c.pattern))
.map(|c| {
c.hint
.split_whitespace()
.next()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(1)
})
.sum();
let paths: usize = changes
.iter()
.filter(|c| PATH_PATTERNS.contains(&c.pattern))
.map(|c| {
c.hint
.split_whitespace()
.next()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(1)
})
.sum();
match (secrets, paths) {
(0, 0) => "nothing redacted".to_string(),
(s, 0) => format!("{s} secret(s) redacted"),
(0, p) => format!("{p} path(s) redacted"),
(s, p) => format!("{s} secret(s), {p} path(s) redacted"),
}
}