use regex::Regex;
use serde::Serialize;
use std::sync::OnceLock;
pub mod log;
pub use log::{format_log, write_log_if_any};
pub const PATTERN_SET: &str = "v1";
pub const REDACTION_TOKEN: &str = "[REDACTED]";
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct Match {
pub pattern_id: String,
pub offset: usize,
pub length: usize,
pub redaction_length: usize,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct ScrubResult {
pub redacted: String,
pub matches: Vec<Match>,
}
impl ScrubResult {
pub fn triggered_patterns(&self) -> Vec<String> {
let mut ids: Vec<String> = self.matches.iter().map(|m| m.pattern_id.clone()).collect();
ids.sort();
ids.dedup();
ids
}
}
struct Pattern {
id: &'static str,
regex: Regex,
capture: usize,
}
fn patterns() -> &'static [Pattern] {
static P: OnceLock<Vec<Pattern>> = OnceLock::new();
P.get_or_init(|| {
vec![
Pattern {
id: "github-token",
regex: Regex::new(r"gh[opusr]_[A-Za-z0-9_]{36,}").unwrap(),
capture: 0,
},
Pattern {
id: "gitlab-token",
regex: Regex::new(r"glpat-[A-Za-z0-9_\-]{20,}").unwrap(),
capture: 0,
},
Pattern {
id: "bitbucket-app-password",
regex: Regex::new(r"ATBB[A-Za-z0-9]{16,}").unwrap(),
capture: 0,
},
Pattern {
id: "aws-access-key-id",
regex: Regex::new(
r"(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|A3T[A-Z0-9])[A-Z0-9]{16}",
)
.unwrap(),
capture: 0,
},
Pattern {
id: "generic-secret-kv",
regex: Regex::new(
r#"(?i)(?:secret|token|password|api[_-]?key)\s*[:=]\s*["']?([A-Za-z0-9_\-\.+/=]{12,})["']?"#,
)
.unwrap(),
capture: 1,
},
Pattern {
id: "pem-private-key",
regex: Regex::new(
r"(?s)-----BEGIN [A-Z ]+PRIVATE KEY-----.*?-----END [A-Z ]+PRIVATE KEY-----",
)
.unwrap(),
capture: 0,
},
]
})
}
pub fn pattern_ids() -> Vec<&'static str> {
patterns().iter().map(|p| p.id).collect()
}
pub fn scrub_text(input: &str) -> ScrubResult {
let mut spans: Vec<(usize, usize, &'static str)> = Vec::new();
for p in patterns() {
for caps in p.regex.captures_iter(input) {
let Some(m) = caps.get(p.capture) else {
continue;
};
spans.push((m.start(), m.end(), p.id));
}
}
spans.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| (b.1 - b.0).cmp(&(a.1 - a.0))));
let mut kept: Vec<(usize, usize, &'static str)> = Vec::new();
for span in spans {
if let Some(last) = kept.last()
&& span.0 < last.1
{
continue;
}
kept.push(span);
}
let matches: Vec<Match> = kept
.iter()
.map(|(start, end, id)| Match {
pattern_id: (*id).to_string(),
offset: *start,
length: end - start,
redaction_length: REDACTION_TOKEN.len(),
})
.collect();
let mut redacted = input.to_string();
for (start, end, _) in kept.iter().rev() {
redacted.replace_range(start..end, REDACTION_TOKEN);
}
ScrubResult { redacted, matches }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_github_token() {
let payload = "auth: ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa next";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "github-token");
assert!(result.redacted.contains(REDACTION_TOKEN));
assert!(!result.redacted.contains("ghp_"));
}
#[test]
fn detects_gitlab_pat() {
let payload = "url: https://oauth2:glpat-AAAAAAAAAAAAAAAAAAAA@example.com/x.git";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "gitlab-token");
assert!(!result.redacted.contains("glpat-"));
}
#[test]
fn detects_bitbucket_app_password() {
let payload = "pass=ATBB1234567890abcdefXYZ end";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "bitbucket-app-password");
}
#[test]
fn detects_aws_access_key_id() {
let payload = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE end";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "aws-access-key-id");
}
#[test]
fn detects_generic_secret_kv_value_only() {
let payload = "password: \"supersecretvalue12\" trailing";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "generic-secret-kv");
assert!(result.redacted.starts_with("password: \"[REDACTED]\""));
}
#[test]
fn detects_pem_private_key_block() {
let payload = "before\n-----BEGIN RSA PRIVATE KEY-----\nlines\nlines\n-----END RSA PRIVATE KEY-----\nafter";
let result = scrub_text(payload);
assert_eq!(result.matches.len(), 1);
assert_eq!(result.matches[0].pattern_id, "pem-private-key");
assert!(result.redacted.contains("before\n[REDACTED]\nafter"));
}
#[test]
fn negative_short_token_not_matched() {
let payload = "ghp_tooshort and glpat-short and no_aws_here";
let result = scrub_text(payload);
assert!(
result.matches.is_empty(),
"got matches: {:?}",
result.matches
);
}
#[test]
fn negative_random_word_not_secret() {
let payload = "the password is in the doc";
let result = scrub_text(payload);
assert!(
result.matches.is_empty(),
"got matches: {:?}",
result.matches
);
}
#[test]
fn end_to_end_all_patterns_in_one_payload() {
let payload = "\
A: ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
B: glpat-AAAAAAAAAAAAAAAAAAAA
C: ATBB1234567890abcdefXYZ
D: AKIAIOSFODNN7EXAMPLE
E: secret=topsecretvalue42
F: -----BEGIN OPENSSH PRIVATE KEY-----\nbody\n-----END OPENSSH PRIVATE KEY-----
";
let result = scrub_text(payload);
let ids = result.triggered_patterns();
assert_eq!(
ids,
vec![
"aws-access-key-id".to_string(),
"bitbucket-app-password".to_string(),
"generic-secret-kv".to_string(),
"github-token".to_string(),
"gitlab-token".to_string(),
"pem-private-key".to_string(),
]
);
assert!(!result.redacted.contains("ghp_"));
assert!(!result.redacted.contains("glpat-"));
assert!(!result.redacted.contains("AKIA"));
assert!(!result.redacted.contains("ATBB"));
assert!(!result.redacted.contains("topsecretvalue42"));
assert!(!result.redacted.contains("PRIVATE KEY"));
}
#[test]
fn empty_input_returns_empty_result() {
let result = scrub_text("");
assert!(result.matches.is_empty());
assert_eq!(result.redacted, "");
}
#[test]
fn overlapping_matches_prefer_earliest_widest() {
let payload = "password: glpat-AAAAAAAAAAAAAAAAAAAA";
let result = scrub_text(payload);
let redaction_count = result.redacted.matches(REDACTION_TOKEN).count();
assert_eq!(redaction_count, 1, "redacted: {}", result.redacted);
}
#[test]
fn pattern_ids_listing_is_stable() {
let ids = pattern_ids();
assert_eq!(
ids,
vec![
"github-token",
"gitlab-token",
"bitbucket-app-password",
"aws-access-key-id",
"generic-secret-kv",
"pem-private-key",
]
);
}
}