use lazy_static::lazy_static;
use regex::bytes::{NoExpand, Regex};
const REDACTED: &[u8] = b"[REDACTED]";
const AUTH_REDACTED_LINE: &[u8] = b"[REDACTED]\n";
const BEARER_REDACTED: &[u8] = b"Bearer [REDACTED]";
lazy_static! {
static ref AUTH_HEADER: Regex = Regex::new(r"(?mi)^\s*Authorization\s*:\s*.+$").unwrap();
static ref BEARER_TOKEN: Regex = Regex::new(r"(?i)Bearer\s+\S+").unwrap();
static ref SK_KEY: Regex = Regex::new(r"sk[-_][A-Za-z0-9_-]{20,}").unwrap();
}
pub fn scrub_content(data: &[u8]) -> Vec<u8> {
let s = AUTH_HEADER.replace_all(data, NoExpand(AUTH_REDACTED_LINE));
let s = BEARER_TOKEN.replace_all(&s, NoExpand(BEARER_REDACTED));
let s = SK_KEY.replace_all(&s, NoExpand(REDACTED));
s.into_owned()
}
pub fn contains_forbidden_patterns(data: &[u8]) -> bool {
AUTH_HEADER.is_match(data) || BEARER_TOKEN.is_match(data) || SK_KEY.is_match(data)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scrub_redacts_auth_header() {
let raw = b"Content-Type: application/json\nAuthorization: Bearer sk-secret123\n\n{}";
let out = scrub_content(raw);
assert!(!out.windows(10).any(|w| w == b"sk-secret"));
assert!(out.windows(REDACTED.len()).any(|w| w == REDACTED));
assert!(!out
.windows(b"Authorization".len())
.any(|w| w == b"Authorization"));
}
#[test]
fn scrub_redacts_bearer_and_sk() {
let raw = b"Bearer sk-proj-abc123def456";
let out = scrub_content(raw);
assert!(!out.windows(6).any(|w| w == b"sk-proj"));
assert!(
out.windows(BEARER_REDACTED.len())
.any(|w| w == BEARER_REDACTED)
|| out.windows(REDACTED.len()).any(|w| w == REDACTED)
);
}
#[test]
fn scrub_redacts_sk_key() {
let raw = b"api_key=sk-abcdefghij1234567890xyz";
let out = scrub_content(raw);
assert!(!out.windows(14).any(|w| w == b"sk-abcdefghij"));
assert!(out.windows(REDACTED.len()).any(|w| w == REDACTED));
}
#[test]
fn contains_forbidden_detects_auth_header() {
assert!(contains_forbidden_patterns(b"Authorization: Bearer SECRET"));
assert!(contains_forbidden_patterns(b"authorization: bearer x"));
assert!(!contains_forbidden_patterns(
b"Content-Type: application/json"
));
}
#[test]
fn contains_forbidden_detects_sk_key() {
assert!(contains_forbidden_patterns(
b"sk-abcdefghij1234567890abcdefghij"
));
assert!(contains_forbidden_patterns(b"sk_live_abcdefghij1234567890"));
assert!(!contains_forbidden_patterns(b"sk- short"));
}
#[test]
fn safe_content_unchanged() {
let safe = b"{\"method\":\"GET\",\"url\":\"/api\"}";
let out = scrub_content(safe);
assert_eq!(&out[..], safe);
assert!(!contains_forbidden_patterns(safe));
}
#[test]
fn contains_forbidden_patterns_detects_in_valid_utf8_with_ascii_secret() {
let with_secret = b"Authorization: Bearer SECRET\xff\xfe";
assert!(
contains_forbidden_patterns(with_secret),
"verify must not skip non-UTF8 content that contains ASCII secrets"
);
}
#[test]
fn scrub_preserves_binary_without_pattern() {
let binary = [0u8, 1, 2, 0xff, 0xfe, 100];
let out = scrub_content(&binary);
assert_eq!(&out[..], &binary[..]);
}
#[test]
fn contains_forbidden_detects_sk_key_at_min_length() {
let at_boundary = b"sk-abcdefghij1234567890";
assert_eq!(at_boundary.len(), 23); assert!(
contains_forbidden_patterns(at_boundary),
"sk-key with exactly 20 chars after prefix must be detected"
);
}
#[test]
fn contains_forbidden_ignores_sk_key_below_min_length() {
let below_boundary = b"sk-abcdefghij123456789";
assert_eq!(below_boundary.len(), 22); assert!(
!contains_forbidden_patterns(below_boundary),
"sk-key with only 19 chars after prefix must NOT be detected"
);
}
#[test]
fn contains_forbidden_detects_sk_underscore_at_min_length() {
let key = b"sk_live_abcdefghij12";
assert_eq!(key.len(), 20); assert!(
!contains_forbidden_patterns(key),
"sk_ + 17 chars should not match (need 20 after prefix)"
);
let key = b"sk_live_abcdefghij1234567890";
assert!(contains_forbidden_patterns(key));
}
#[test]
fn contains_forbidden_detects_auth_with_null_byte_before() {
let data = b"\x00Authorization: Bearer secret";
assert!(
contains_forbidden_patterns(data),
"null byte before auth header must not prevent detection"
);
}
#[test]
fn contains_forbidden_detects_auth_line_even_with_continuation() {
let standard = b"Authorization: Bearer sk-proj-abcdef0123456789abcd\r\n";
assert!(contains_forbidden_patterns(standard));
let obs_fold = b"Authorization:\r\n Bearer sk-proj-abcdef0123456789abcd\r\n";
assert!(
contains_forbidden_patterns(obs_fold),
"Bearer token on continuation line must still be caught by BEARER_TOKEN regex"
);
}
#[test]
fn contains_forbidden_detects_bearer_on_standalone_line() {
let data = b" Bearer sk-proj-abcdef0123456789abcd\r\n";
assert!(
contains_forbidden_patterns(data),
"BEARER_TOKEN regex must catch standalone bearer tokens"
);
}
#[test]
fn contains_forbidden_does_not_detect_base64_encoded_key() {
let b64_key = b"api_key=c2stcHJvai1hYmMxMjNkZWY0NTZnaGlq";
assert!(
!contains_forbidden_patterns(b64_key),
"base64-encoded keys are outside scrub scope (acknowledged limitation)"
);
}
}