use crate::category::Category;
use crate::error::{Result, SanitizeError};
use crate::scanner::ScanPattern;
pub type PatternCompileResult = (Vec<ScanPattern>, Vec<(usize, SanitizeError)>);
use aes_gcm::aead::{Aead, KeyInit};
use aes_gcm::{Aes256Gcm, Nonce};
use hmac::Hmac;
use rand::RngCore;
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use zeroize::{Zeroize, Zeroizing};
const SALT_LEN: usize = 32;
const NONCE_LEN: usize = 12;
const PBKDF2_ITERATIONS: u32 = 600_000;
const MIN_ENCRYPTED_LEN: usize = SALT_LEN + NONCE_LEN + 16;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SecretEntry {
pub pattern: String,
#[serde(default = "default_kind")]
pub kind: String,
#[serde(default = "default_category")]
pub category: String,
#[serde(default)]
pub label: Option<String>,
}
impl Drop for SecretEntry {
fn drop(&mut self) {
self.pattern.zeroize();
self.kind.zeroize();
self.category.zeroize();
if let Some(ref mut l) = self.label {
l.zeroize();
}
}
}
fn default_kind() -> String {
"literal".into()
}
fn default_category() -> String {
"custom:secret".into()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SecretsFormat {
Json,
Yaml,
Toml,
}
impl SecretsFormat {
pub fn from_extension(path: &str) -> Option<Self> {
let base = path.strip_suffix(".enc").unwrap_or(path);
let ext = std::path::Path::new(base).extension();
if ext.is_some_and(|e| e.eq_ignore_ascii_case("json")) {
Some(Self::Json)
} else if ext
.is_some_and(|e| e.eq_ignore_ascii_case("yaml") || e.eq_ignore_ascii_case("yml"))
{
Some(Self::Yaml)
} else if ext.is_some_and(|e| e.eq_ignore_ascii_case("toml")) {
Some(Self::Toml)
} else {
None
}
}
pub fn detect(content: &[u8]) -> Self {
let s = String::from_utf8_lossy(content);
let trimmed = s.trim_start();
if trimmed.starts_with('[') || trimmed.starts_with('{') {
Self::Json
} else if trimmed.starts_with('-') || trimmed.starts_with("---") {
Self::Yaml
} else {
Self::Toml
}
}
}
#[derive(Deserialize)]
struct TomlSecrets {
secrets: Vec<SecretEntry>,
}
#[derive(Serialize)]
struct TomlSecretsRef<'a> {
secrets: &'a [SecretEntry],
}
fn derive_key(password: &[u8], salt: &[u8]) -> Zeroizing<[u8; 32]> {
let mut key = Zeroizing::new([0u8; 32]);
pbkdf2::pbkdf2::<Hmac<Sha256>>(password, salt, PBKDF2_ITERATIONS, key.as_mut())
.expect("PBKDF2 output length is valid");
key
}
pub fn encrypt_secrets(plaintext: &[u8], password: &str) -> Result<Vec<u8>> {
if password.is_empty() {
return Err(SanitizeError::SecretsEmptyPassword);
}
let mut rng = rand::thread_rng();
let mut salt = [0u8; SALT_LEN];
rng.fill_bytes(&mut salt);
let mut nonce_bytes = [0u8; NONCE_LEN];
rng.fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
let key = derive_key(password.as_bytes(), &salt);
let cipher = Aes256Gcm::new_from_slice(key.as_ref())
.map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
let ciphertext = cipher
.encrypt(nonce, plaintext)
.map_err(|e| SanitizeError::SecretsCipherError(format!("encryption: {}", e)))?;
let mut output = Vec::with_capacity(SALT_LEN + NONCE_LEN + ciphertext.len());
output.extend_from_slice(&salt);
output.extend_from_slice(&nonce_bytes);
output.extend_from_slice(&ciphertext);
Ok(output)
}
pub fn decrypt_secrets(encrypted: &[u8], password: &str) -> Result<Zeroizing<Vec<u8>>> {
if encrypted.len() < MIN_ENCRYPTED_LEN {
return Err(SanitizeError::SecretsTooShort);
}
let salt = &encrypted[..SALT_LEN];
let nonce_bytes = &encrypted[SALT_LEN..SALT_LEN + NONCE_LEN];
let ciphertext = &encrypted[SALT_LEN + NONCE_LEN..];
let nonce = Nonce::from_slice(nonce_bytes);
let key = derive_key(password.as_bytes(), salt);
let cipher = Aes256Gcm::new_from_slice(key.as_ref())
.map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
SanitizeError::SecretsDecryptFailed
})?;
Ok(Zeroizing::new(plaintext))
}
pub fn parse_secrets(plaintext: &[u8], format: Option<SecretsFormat>) -> Result<Vec<SecretEntry>> {
let fmt = format.unwrap_or_else(|| SecretsFormat::detect(plaintext));
let text = std::str::from_utf8(plaintext)
.map_err(|e| SanitizeError::SecretsInvalidUtf8(e.to_string()))?;
match fmt {
SecretsFormat::Json => serde_json::from_str(text).map_err(|e| {
SanitizeError::SecretsFormatError {
format: "JSON".into(),
message: e.to_string(),
}
}),
SecretsFormat::Yaml => serde_yaml_ng::from_str(text).map_err(|e| {
SanitizeError::SecretsFormatError {
format: "YAML".into(),
message: e.to_string(),
}
}),
SecretsFormat::Toml => {
let wrapper: TomlSecrets = toml::from_str(text).map_err(|e| {
SanitizeError::SecretsFormatError {
format: "TOML".into(),
message: e.to_string(),
}
})?;
Ok(wrapper.secrets)
}
}
}
pub fn serialize_secrets(entries: &[SecretEntry], format: SecretsFormat) -> Result<Vec<u8>> {
match format {
SecretsFormat::Json => serde_json::to_vec_pretty(entries).map_err(|e| {
SanitizeError::SecretsFormatError {
format: "JSON-serialize".into(),
message: e.to_string(),
}
}),
SecretsFormat::Yaml => serde_yaml_ng::to_string(entries)
.map(|s| s.into_bytes())
.map_err(|e| SanitizeError::SecretsFormatError {
format: "YAML-serialize".into(),
message: e.to_string(),
}),
SecretsFormat::Toml => {
let wrapper = TomlSecretsRef { secrets: entries };
toml::to_string_pretty(&wrapper)
.map(|s| s.into_bytes())
.map_err(|e| SanitizeError::SecretsFormatError {
format: "TOML-serialize".into(),
message: e.to_string(),
})
}
}
}
pub fn parse_category(s: &str) -> Category {
match s {
"email" => Category::Email,
"name" => Category::Name,
"phone" => Category::Phone,
"ipv4" => Category::IpV4,
"ipv6" => Category::IpV6,
"credit_card" => Category::CreditCard,
"ssn" => Category::Ssn,
"hostname" => Category::Hostname,
"mac_address" => Category::MacAddress,
"container_id" => Category::ContainerId,
"uuid" => Category::Uuid,
"jwt" => Category::Jwt,
"auth_token" => Category::AuthToken,
"file_path" => Category::FilePath,
"windows_sid" => Category::WindowsSid,
"url" => Category::Url,
"aws_arn" => Category::AwsArn,
"azure_resource_id" => Category::AzureResourceId,
other => {
let tag = other.strip_prefix("custom:").unwrap_or(other);
Category::Custom(tag.into())
}
}
}
fn zeroize_and_drop_entries(mut entries: Vec<SecretEntry>) {
for entry in &mut entries {
entry.pattern.zeroize();
entry.kind.zeroize();
entry.category.zeroize();
if let Some(ref mut l) = entry.label {
l.zeroize();
}
}
}
pub fn entries_to_patterns(entries: &[SecretEntry]) -> PatternCompileResult {
let mut patterns = Vec::with_capacity(entries.len());
let mut errors = Vec::new();
for (i, entry) in entries.iter().enumerate() {
let category = parse_category(&entry.category);
let label = entry
.label
.clone()
.unwrap_or_else(|| truncate_label(&entry.pattern));
let result = match entry.kind.as_str() {
"regex" => ScanPattern::from_regex(&entry.pattern, category, label),
_ => ScanPattern::from_literal(&entry.pattern, category, label),
};
match result {
Ok(pat) => patterns.push(pat),
Err(e) => errors.push((i, e)),
}
}
(patterns, errors)
}
fn truncate_label(s: &str) -> String {
if s.len() <= 32 {
s.to_string()
} else {
format!("{}…", &s[..31])
}
}
pub fn load_encrypted_secrets(
encrypted_bytes: &[u8],
password: &str,
format: Option<SecretsFormat>,
) -> Result<PatternCompileResult> {
let plaintext = decrypt_secrets(encrypted_bytes, password)?;
let entries = parse_secrets(&plaintext, format)?;
let result = entries_to_patterns(&entries);
zeroize_and_drop_entries(entries);
Ok(result)
}
pub fn load_plaintext_secrets(
plaintext: &[u8],
format: Option<SecretsFormat>,
) -> Result<PatternCompileResult> {
let entries = parse_secrets(plaintext, format)?;
let result = entries_to_patterns(&entries);
zeroize_and_drop_entries(entries);
Ok(result)
}
pub fn looks_encrypted(data: &[u8]) -> bool {
if data.len() < MIN_ENCRYPTED_LEN {
return false;
}
if let Ok(text) = std::str::from_utf8(data) {
let trimmed = text.trim_start();
let has_marker = trimmed.starts_with('[')
|| trimmed.starts_with('{')
|| trimmed.starts_with('-')
|| trimmed.starts_with('#');
if has_marker {
return false;
}
}
true
}
pub fn load_secrets_auto(
data: &[u8],
password: Option<&str>,
format: Option<SecretsFormat>,
force_plaintext: bool,
) -> Result<(PatternCompileResult, bool)> {
if force_plaintext || !looks_encrypted(data) {
let result = load_plaintext_secrets(data, format)?;
Ok((result, false))
} else {
let pw = password.ok_or(SanitizeError::SecretsPasswordRequired)?;
let result = load_encrypted_secrets(data, pw, format)?;
Ok((result, true))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_json() -> &'static str {
r#"[
{
"pattern": "alice@corp\\.com",
"kind": "regex",
"category": "email",
"label": "alice_email"
},
{
"pattern": "sk-proj-abc123secret",
"kind": "literal",
"category": "custom:api_key",
"label": "openai_key"
}
]"#
}
fn sample_yaml() -> &'static str {
r#"- pattern: "alice@corp\\.com"
kind: regex
category: email
label: alice_email
- pattern: sk-proj-abc123secret
kind: literal
category: "custom:api_key"
label: openai_key
"#
}
fn sample_toml() -> &'static str {
r#"[[secrets]]
pattern = "alice@corp\\.com"
kind = "regex"
category = "email"
label = "alice_email"
[[secrets]]
pattern = "sk-proj-abc123secret"
kind = "literal"
category = "custom:api_key"
label = "openai_key"
"#
}
#[test]
fn parse_json_entries() {
let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].kind, "regex");
assert_eq!(entries[0].category, "email");
assert_eq!(entries[1].kind, "literal");
}
#[test]
fn parse_yaml_entries() {
let entries = parse_secrets(sample_yaml().as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].label, Some("alice_email".into()));
}
#[test]
fn parse_toml_entries() {
let entries = parse_secrets(sample_toml().as_bytes(), Some(SecretsFormat::Toml)).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[1].pattern, "sk-proj-abc123secret");
}
#[test]
fn parse_auto_detect_json() {
let entries = parse_secrets(sample_json().as_bytes(), None).unwrap();
assert_eq!(entries.len(), 2);
}
#[test]
fn parse_auto_detect_yaml() {
let entries = parse_secrets(sample_yaml().as_bytes(), None).unwrap();
assert_eq!(entries.len(), 2);
}
#[test]
fn parse_builtin_categories() {
assert_eq!(parse_category("email"), Category::Email);
assert_eq!(parse_category("ipv4"), Category::IpV4);
assert_eq!(parse_category("ssn"), Category::Ssn);
}
#[test]
fn parse_custom_category() {
match parse_category("custom:api_key") {
Category::Custom(tag) => assert_eq!(tag.as_str(), "api_key"),
other => panic!("expected Custom, got {:?}", other),
}
}
#[test]
fn parse_unknown_category_becomes_custom() {
match parse_category("foobar") {
Category::Custom(tag) => assert_eq!(tag.as_str(), "foobar"),
other => panic!("expected Custom, got {:?}", other),
}
}
#[test]
fn entries_to_patterns_success() {
let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
let (patterns, errors) = entries_to_patterns(&entries);
assert_eq!(patterns.len(), 2);
assert!(errors.is_empty());
}
#[test]
fn entries_to_patterns_bad_regex() {
let json = r#"[{"pattern": "[invalid(", "kind": "regex", "category": "email"}]"#;
let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
let (patterns, errors) = entries_to_patterns(&entries);
assert!(patterns.is_empty());
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].0, 0);
}
#[test]
fn encrypt_decrypt_roundtrip() {
let plaintext = sample_json().as_bytes();
let password = "test-password-42";
let encrypted = encrypt_secrets(plaintext, password).unwrap();
assert!(encrypted.len() > plaintext.len());
let decrypted = decrypt_secrets(&encrypted, password).unwrap();
assert_eq!(decrypted.as_slice(), plaintext);
}
#[test]
fn decrypt_wrong_password_fails() {
let plaintext = b"hello";
let encrypted = encrypt_secrets(plaintext, "correct").unwrap();
let result = decrypt_secrets(&encrypted, "wrong");
assert!(result.is_err());
}
#[test]
fn decrypt_truncated_blob_fails() {
let result = decrypt_secrets(&[0u8; 10], "any");
assert!(result.is_err());
}
#[test]
fn decrypt_tampered_blob_fails() {
let plaintext = b"hello world";
let mut encrypted = encrypt_secrets(plaintext, "pw").unwrap();
let last = encrypted.len() - 1;
encrypted[last] ^= 0xFF;
let result = decrypt_secrets(&encrypted, "pw");
assert!(result.is_err());
}
#[test]
fn encrypt_empty_password_rejected() {
let result = encrypt_secrets(b"hello", "");
assert!(result.is_err());
}
#[test]
fn full_pipeline_json() {
let plaintext = sample_json().as_bytes();
let password = "pipeline-test";
let encrypted = encrypt_secrets(plaintext, password).unwrap();
let (patterns, errors) =
load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Json)).unwrap();
assert_eq!(patterns.len(), 2);
assert!(errors.is_empty());
assert_eq!(patterns[0].label(), "alice_email");
assert_eq!(patterns[1].label(), "openai_key");
}
#[test]
fn full_pipeline_yaml() {
let plaintext = sample_yaml().as_bytes();
let password = "yaml-test";
let encrypted = encrypt_secrets(plaintext, password).unwrap();
let (patterns, errors) =
load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Yaml)).unwrap();
assert_eq!(patterns.len(), 2);
assert!(errors.is_empty());
}
#[test]
fn full_pipeline_toml() {
let plaintext = sample_toml().as_bytes();
let password = "toml-test";
let encrypted = encrypt_secrets(plaintext, password).unwrap();
let (patterns, errors) =
load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Toml)).unwrap();
assert_eq!(patterns.len(), 2);
assert!(errors.is_empty());
}
#[test]
fn load_plaintext_secrets_works() {
let (patterns, errors) =
load_plaintext_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
assert_eq!(patterns.len(), 2);
assert!(errors.is_empty());
}
#[test]
fn serialize_roundtrip_json() {
let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
let serialized = serialize_secrets(&entries, SecretsFormat::Json).unwrap();
let reparsed = parse_secrets(&serialized, Some(SecretsFormat::Json)).unwrap();
assert_eq!(entries.len(), reparsed.len());
assert_eq!(entries[0].pattern, reparsed[0].pattern);
}
#[test]
fn format_from_extension() {
assert_eq!(
SecretsFormat::from_extension("secrets.json"),
Some(SecretsFormat::Json)
);
assert_eq!(
SecretsFormat::from_extension("secrets.json.enc"),
Some(SecretsFormat::Json)
);
assert_eq!(
SecretsFormat::from_extension("secrets.yaml"),
Some(SecretsFormat::Yaml)
);
assert_eq!(
SecretsFormat::from_extension("secrets.yml.enc"),
Some(SecretsFormat::Yaml)
);
assert_eq!(
SecretsFormat::from_extension("secrets.toml"),
Some(SecretsFormat::Toml)
);
assert_eq!(SecretsFormat::from_extension("secrets.txt"), None);
}
#[test]
fn default_kind_is_literal() {
let json = r#"[{"pattern": "foo"}]"#;
let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
assert_eq!(entries[0].kind, "literal");
}
#[test]
fn default_category_is_custom_secret() {
let json = r#"[{"pattern": "foo"}]"#;
let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
assert_eq!(entries[0].category, "custom:secret");
}
#[test]
fn default_label_from_pattern() {
let json = r#"[{"pattern": "short"}]"#;
let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
let (patterns, _) = entries_to_patterns(&entries);
assert_eq!(patterns[0].label(), "short");
}
#[test]
fn looks_encrypted_json_plaintext() {
assert!(!looks_encrypted(sample_json().as_bytes()));
}
#[test]
fn looks_encrypted_yaml_plaintext() {
assert!(!looks_encrypted(sample_yaml().as_bytes()));
}
#[test]
fn looks_encrypted_toml_plaintext() {
assert!(!looks_encrypted(sample_toml().as_bytes()));
}
#[test]
fn looks_encrypted_actual_encrypted() {
let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
assert!(looks_encrypted(&encrypted));
}
#[test]
fn looks_encrypted_too_short() {
assert!(!looks_encrypted(&[0u8; 10]));
}
#[test]
fn auto_load_plaintext_json() {
let data = sample_json().as_bytes();
let ((pats, errs), was_enc) =
load_secrets_auto(data, None, Some(SecretsFormat::Json), false).unwrap();
assert!(!was_enc);
assert_eq!(pats.len(), 2);
assert!(errs.is_empty());
}
#[test]
fn auto_load_encrypted_json() {
let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
let ((pats, errs), was_enc) =
load_secrets_auto(&encrypted, Some("pw"), Some(SecretsFormat::Json), false).unwrap();
assert!(was_enc);
assert_eq!(pats.len(), 2);
assert!(errs.is_empty());
}
#[test]
fn auto_load_force_plaintext() {
let data = sample_json().as_bytes();
let ((pats, _), was_enc) =
load_secrets_auto(data, None, Some(SecretsFormat::Json), true).unwrap();
assert!(!was_enc);
assert_eq!(pats.len(), 2);
}
#[test]
fn auto_load_encrypted_no_password_fails() {
let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
let result = load_secrets_auto(&encrypted, None, None, false);
assert!(result.is_err());
}
}