use std::io::{self, Read, Write};
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use base64::{engine::general_purpose, Engine as _};
use percent_encoding::{percent_encode, NON_ALPHANUMERIC};
use crate::error::{RedactionError, SecretshError};
const STREAM_BUF_SIZE: usize = 65_536;
pub struct Redactor {
automaton: Option<AhoCorasick>,
replacements: Vec<Vec<u8>>,
}
impl Redactor {
pub fn new(secrets: &[(&str, &[u8])]) -> Result<Self, SecretshError> {
let mut patterns: Vec<Vec<u8>> = Vec::new();
let mut replacements: Vec<Vec<u8>> = Vec::new();
for (key, value) in secrets {
if value.is_empty() {
continue;
}
let raw_label = format!("[REDACTED_{key}]");
patterns.push(value.to_vec());
replacements.push(raw_label.into_bytes());
let b64 = general_purpose::STANDARD.encode(value).into_bytes();
if b64 != *value {
let label = format!("[REDACTED_{key}_B64]");
patterns.push(b64);
replacements.push(label.into_bytes());
}
let b64url = general_purpose::URL_SAFE.encode(value).into_bytes();
if b64url != *value {
let label = format!("[REDACTED_{key}_B64URL]");
patterns.push(b64url);
replacements.push(label.into_bytes());
}
let url_enc = percent_encode(value, NON_ALPHANUMERIC)
.to_string()
.into_bytes();
if url_enc != *value {
let label = format!("[REDACTED_{key}_URL]");
patterns.push(url_enc);
replacements.push(label.into_bytes());
}
let hex_lower = hex::encode(value).into_bytes();
if hex_lower != *value {
let label = format!("[REDACTED_{key}_HEX]");
patterns.push(hex_lower);
replacements.push(label.into_bytes());
}
let hex_upper = hex::encode_upper(value).into_bytes();
if hex_upper != *value {
let label = format!("[REDACTED_{key}_HEX]");
patterns.push(hex_upper);
replacements.push(label.into_bytes());
}
}
if patterns.is_empty() {
return Ok(Self {
automaton: None,
replacements: Vec::new(),
});
}
let automaton = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(&patterns)
.map_err(|e| {
SecretshError::Redaction(RedactionError::PatternBuildFailed {
reason: e.to_string(),
})
})?;
Ok(Self {
automaton: Some(automaton),
replacements,
})
}
#[inline]
pub fn has_patterns(&self) -> bool {
self.automaton.is_some()
}
pub fn redact_bytes(&self, input: &[u8]) -> Vec<u8> {
let Some(ac) = &self.automaton else {
return input.to_vec();
};
let mut output = Vec::with_capacity(input.len());
let mut last_end = 0usize;
for mat in ac.find_iter(input) {
output.extend_from_slice(&input[last_end..mat.start()]);
output.extend_from_slice(&self.replacements[mat.pattern().as_usize()]);
last_end = mat.end();
}
output.extend_from_slice(&input[last_end..]);
output
}
pub fn redact_str(&self, input: &str) -> String {
let redacted = self.redact_bytes(input.as_bytes());
String::from_utf8_lossy(&redacted).into_owned()
}
pub fn redact_stream(
&self,
input: &mut dyn Read,
output: &mut dyn Write,
) -> Result<u64, io::Error> {
let mut buf = Vec::with_capacity(STREAM_BUF_SIZE);
input.read_to_end(&mut buf)?;
let redacted = self.redact_bytes(&buf);
output.write_all(&redacted)?;
Ok(redacted.len() as u64)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn single(key: &str, value: &[u8]) -> Redactor {
Redactor::new(&[(key, value)]).expect("Redactor::new should not fail")
}
#[test]
fn single_secret_raw_redacted() {
let r = single("DB_PASS", b"hunter2");
let out = r.redact_str("The password is hunter2, keep it safe.");
assert_eq!(out, "The password is [REDACTED_DB_PASS], keep it safe.");
}
#[test]
fn multiple_secrets_all_redacted() {
let r = Redactor::new(&[("API_KEY", b"abc123"), ("DB_PASS", b"hunter2")]).unwrap();
let out = r.redact_str("key=abc123 pass=hunter2 end");
assert_eq!(out, "key=[REDACTED_API_KEY] pass=[REDACTED_DB_PASS] end");
}
#[test]
fn no_match_returns_input_unchanged() {
let r = single("TOKEN", b"secret_token");
let input = "nothing sensitive here";
assert_eq!(r.redact_str(input), input);
}
#[test]
fn base64_encoded_secret_redacted() {
let value = b"hunter2";
let b64 = general_purpose::STANDARD.encode(value); let r = single("DB_PASS", value);
let input = format!("encoded={b64}");
let out = r.redact_str(&input);
assert_eq!(out, "encoded=[REDACTED_DB_PASS_B64]");
}
#[test]
fn base64url_encoded_secret_redacted() {
let value = b"\xfb\xff"; let b64url = general_purpose::URL_SAFE.encode(value);
let r = single("BIN", value);
let input = format!("data={b64url}");
let out = r.redact_str(&input);
assert_eq!(out, "data=[REDACTED_BIN_B64URL]");
}
#[test]
fn url_encoded_secret_redacted() {
let value = b"p@ss w0rd!";
let url_enc = percent_encode(value, NON_ALPHANUMERIC).to_string();
let r = single("PASS", value);
let input = format!("password={url_enc}");
let out = r.redact_str(&input);
assert_eq!(out, "password=[REDACTED_PASS_URL]");
}
#[test]
fn hex_lower_encoded_secret_redacted() {
let value = b"deadbeef_raw";
let hex_l = hex::encode(value);
let r = single("KEY", value);
let input = format!("hex={hex_l}");
let out = r.redact_str(&input);
assert_eq!(out, "hex=[REDACTED_KEY_HEX]");
}
#[test]
fn hex_upper_encoded_secret_redacted() {
let value = b"deadbeef_raw";
let hex_u = hex::encode_upper(value);
let r = single("KEY", value);
let input = format!("hex={hex_u}");
let out = r.redact_str(&input);
assert_eq!(out, "hex=[REDACTED_KEY_HEX]");
}
#[test]
fn empty_secret_value_is_skipped() {
let r = Redactor::new(&[("EMPTY", b""), ("REAL", b"secret")]).unwrap();
assert!(r.has_patterns(), "REAL should still produce patterns");
let out = r.redact_str("value=secret");
assert_eq!(out, "value=[REDACTED_REAL]");
}
#[test]
fn all_empty_secrets_yields_no_patterns() {
let r = Redactor::new(&[("A", b""), ("B", b"")]).unwrap();
assert!(!r.has_patterns());
}
#[test]
fn no_secrets_yields_no_patterns() {
let r = Redactor::new(&[]).unwrap();
assert!(!r.has_patterns());
let input = "nothing to redact";
assert_eq!(r.redact_str(input), input);
}
#[test]
fn overlapping_secrets_leftmost_wins() {
let r = Redactor::new(&[("LONG", b"abcdef"), ("SHORT", b"abc")]).unwrap();
let out = r.redact_str("abcdef");
assert_eq!(out, "[REDACTED_LONG]");
}
#[test]
fn adjacent_secrets_both_redacted() {
let r = Redactor::new(&[("A", b"foo"), ("B", b"bar")]).unwrap();
let out = r.redact_str("foobar");
assert_eq!(out, "[REDACTED_A][REDACTED_B]");
}
#[test]
fn secret_at_start_of_input() {
let r = single("S", b"secret");
assert_eq!(r.redact_str("secret is here"), "[REDACTED_S] is here");
}
#[test]
fn secret_at_end_of_input() {
let r = single("S", b"secret");
assert_eq!(
r.redact_str("the value is secret"),
"the value is [REDACTED_S]"
);
}
#[test]
fn multiple_occurrences_all_redacted() {
let r = single("K", b"tok");
let out = r.redact_str("tok tok tok");
assert_eq!(out, "[REDACTED_K] [REDACTED_K] [REDACTED_K]");
}
#[test]
fn encoded_equal_to_raw_not_duplicated() {
let r = Redactor::new(&[("X", b"hello")]).unwrap();
assert!(r.has_patterns());
let b64 = general_purpose::STANDARD.encode(b"hello");
let out = r.redact_str(&format!("raw=hello b64={b64}"));
assert_eq!(out, "raw=[REDACTED_X] b64=[REDACTED_X_B64]");
}
#[test]
fn label_format_raw() {
let r = single("MY_SECRET", b"val");
assert_eq!(r.redact_str("val"), "[REDACTED_MY_SECRET]");
}
#[test]
fn label_format_b64() {
let value = b"val";
let b64 = general_purpose::STANDARD.encode(value);
let r = single("MY_SECRET", value);
assert_eq!(r.redact_str(&b64), "[REDACTED_MY_SECRET_B64]");
}
#[test]
fn label_format_b64url() {
let value = b"\xfb\xff\xfe";
let b64url = general_purpose::URL_SAFE.encode(value);
let r = single("MY_SECRET", value);
assert_eq!(r.redact_str(&b64url), "[REDACTED_MY_SECRET_B64URL]");
}
#[test]
fn label_format_url() {
let value = b"a b";
let url_enc = percent_encode(value, NON_ALPHANUMERIC).to_string();
let r = single("MY_SECRET", value);
assert_eq!(r.redact_str(&url_enc), "[REDACTED_MY_SECRET_URL]");
}
#[test]
fn label_format_hex_lower() {
let value = b"abc";
let hex_l = hex::encode(value); let r = single("MY_SECRET", value);
assert_eq!(r.redact_str(&hex_l), "[REDACTED_MY_SECRET_HEX]");
}
#[test]
fn label_format_hex_upper() {
let value = b"abc";
let hex_u = hex::encode_upper(value); let r = single("MY_SECRET", value);
assert_eq!(r.redact_str(&hex_u), "[REDACTED_MY_SECRET_HEX]");
}
#[test]
fn redact_bytes_works_on_binary_data() {
let value: &[u8] = &[0xDE, 0xAD, 0xBE, 0xEF];
let r = single("BIN", value);
let mut input = b"prefix ".to_vec();
input.extend_from_slice(value);
input.extend_from_slice(b" suffix");
let out = r.redact_bytes(&input);
assert_eq!(out, b"prefix [REDACTED_BIN] suffix");
}
#[test]
fn redact_stream_basic() {
let r = single("TOKEN", b"s3cr3t");
let input_data = b"Authorization: Bearer s3cr3t\n";
let mut input = io::Cursor::new(input_data);
let mut output = Vec::new();
let bytes_written = r.redact_stream(&mut input, &mut output).unwrap();
let expected = b"Authorization: Bearer [REDACTED_TOKEN]\n";
assert_eq!(output, expected);
assert_eq!(bytes_written, expected.len() as u64);
}
#[test]
fn redact_stream_no_match_passes_through() {
let r = single("TOKEN", b"s3cr3t");
let input_data = b"nothing sensitive here\n";
let mut input = io::Cursor::new(input_data);
let mut output = Vec::new();
let bytes_written = r.redact_stream(&mut input, &mut output).unwrap();
assert_eq!(output, input_data);
assert_eq!(bytes_written, input_data.len() as u64);
}
#[test]
fn redact_stream_empty_input() {
let r = single("TOKEN", b"s3cr3t");
let mut input = io::Cursor::new(b"");
let mut output = Vec::new();
let bytes_written = r.redact_stream(&mut input, &mut output).unwrap();
assert_eq!(output, b"");
assert_eq!(bytes_written, 0);
}
#[test]
fn redact_stream_multiple_secrets() {
let r = Redactor::new(&[("A", b"alpha"), ("B", b"beta")]).unwrap();
let input_data = b"alpha and beta are both secrets";
let mut input = io::Cursor::new(input_data);
let mut output = Vec::new();
r.redact_stream(&mut input, &mut output).unwrap();
assert_eq!(output, b"[REDACTED_A] and [REDACTED_B] are both secrets");
}
#[test]
fn has_patterns_true_when_secrets_present() {
let r = single("K", b"v");
assert!(r.has_patterns());
}
#[test]
fn has_patterns_false_when_no_secrets() {
let r = Redactor::new(&[]).unwrap();
assert!(!r.has_patterns());
}
#[test]
fn has_patterns_false_when_only_empty_secrets() {
let r = Redactor::new(&[("K", b"")]).unwrap();
assert!(!r.has_patterns());
}
}