1use crate::category::Category;
69use crate::error::{Result, SanitizeError};
70use crate::scanner::ScanPattern;
71
72pub type PatternCompileResult = (Vec<ScanPattern>, Vec<(usize, SanitizeError)>);
75
76use aes_gcm::aead::{Aead, KeyInit};
77use aes_gcm::{Aes256Gcm, Nonce};
78use hmac::Hmac;
79use rand::RngCore;
80use serde::{Deserialize, Serialize};
81use sha2::Sha256;
82use zeroize::{Zeroize, Zeroizing};
83
84const SALT_LEN: usize = 32;
90
91const NONCE_LEN: usize = 12;
93
94const PBKDF2_ITERATIONS: u32 = 600_000;
96
97const MIN_ENCRYPTED_LEN: usize = SALT_LEN + NONCE_LEN + 16;
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct SecretEntry {
113 pub pattern: String,
115
116 #[serde(default = "default_kind")]
118 pub kind: String,
119
120 #[serde(default = "default_category")]
126 pub category: String,
127
128 #[serde(default)]
131 pub label: Option<String>,
132}
133
134impl Drop for SecretEntry {
135 fn drop(&mut self) {
136 self.pattern.zeroize();
137 self.kind.zeroize();
138 self.category.zeroize();
139 if let Some(ref mut l) = self.label {
140 l.zeroize();
141 }
142 }
143}
144
145fn default_kind() -> String {
146 "literal".into()
147}
148
149fn default_category() -> String {
150 "custom:secret".into()
151}
152
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum SecretsFormat {
156 Json,
157 Yaml,
158 Toml,
159}
160
161impl SecretsFormat {
162 pub fn from_extension(path: &str) -> Option<Self> {
164 let base = path.strip_suffix(".enc").unwrap_or(path);
166 let ext = std::path::Path::new(base).extension();
167 if ext.is_some_and(|e| e.eq_ignore_ascii_case("json")) {
168 Some(Self::Json)
169 } else if ext.is_some_and(|e| {
170 e.eq_ignore_ascii_case("yaml") || e.eq_ignore_ascii_case("yml")
171 }) {
172 Some(Self::Yaml)
173 } else if ext.is_some_and(|e| e.eq_ignore_ascii_case("toml")) {
174 Some(Self::Toml)
175 } else {
176 None
177 }
178 }
179
180 pub fn detect(content: &[u8]) -> Self {
182 let s = String::from_utf8_lossy(content);
183 let trimmed = s.trim_start();
184 if trimmed.starts_with('[') || trimmed.starts_with('{') {
185 Self::Json
188 } else if trimmed.starts_with('-') || trimmed.starts_with("---") {
189 Self::Yaml
190 } else {
191 Self::Toml
193 }
194 }
195}
196
197#[derive(Deserialize)]
203struct TomlSecrets {
204 secrets: Vec<SecretEntry>,
205}
206
207#[derive(Serialize)]
209struct TomlSecretsRef<'a> {
210 secrets: &'a [SecretEntry],
211}
212
213fn derive_key(password: &[u8], salt: &[u8]) -> Zeroizing<[u8; 32]> {
219 let mut key = Zeroizing::new([0u8; 32]);
220 pbkdf2::pbkdf2::<Hmac<Sha256>>(password, salt, PBKDF2_ITERATIONS, key.as_mut())
221 .expect("PBKDF2 output length is valid");
222 key
223}
224
225pub fn encrypt_secrets(plaintext: &[u8], password: &str) -> Result<Vec<u8>> {
249 if password.is_empty() {
250 return Err(SanitizeError::SecretsError(
251 "password must not be empty".into(),
252 ));
253 }
254
255 let mut rng = rand::thread_rng();
256
257 let mut salt = [0u8; SALT_LEN];
259 rng.fill_bytes(&mut salt);
260
261 let mut nonce_bytes = [0u8; NONCE_LEN];
262 rng.fill_bytes(&mut nonce_bytes);
263 let nonce = Nonce::from_slice(&nonce_bytes);
264
265 let key = derive_key(password.as_bytes(), &salt);
267 let cipher = Aes256Gcm::new_from_slice(key.as_ref())
268 .map_err(|e| SanitizeError::SecretsError(format!("cipher init: {}", e)))?;
269
270 let ciphertext = cipher
272 .encrypt(nonce, plaintext)
273 .map_err(|e| SanitizeError::SecretsError(format!("encryption failed: {}", e)))?;
274
275 let mut output = Vec::with_capacity(SALT_LEN + NONCE_LEN + ciphertext.len());
277 output.extend_from_slice(&salt);
278 output.extend_from_slice(&nonce_bytes);
279 output.extend_from_slice(&ciphertext);
280
281 Ok(output)
282}
283
284pub fn decrypt_secrets(encrypted: &[u8], password: &str) -> Result<Zeroizing<Vec<u8>>> {
302 if encrypted.len() < MIN_ENCRYPTED_LEN {
303 return Err(SanitizeError::SecretsError(
304 "encrypted file too short (corrupt or truncated)".into(),
305 ));
306 }
307
308 let salt = &encrypted[..SALT_LEN];
309 let nonce_bytes = &encrypted[SALT_LEN..SALT_LEN + NONCE_LEN];
310 let ciphertext = &encrypted[SALT_LEN + NONCE_LEN..];
311
312 let nonce = Nonce::from_slice(nonce_bytes);
313
314 let key = derive_key(password.as_bytes(), salt);
315 let cipher = Aes256Gcm::new_from_slice(key.as_ref())
316 .map_err(|e| SanitizeError::SecretsError(format!("cipher init: {}", e)))?;
317
318 let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
319 SanitizeError::SecretsError("decryption failed: wrong password or corrupted file".into())
320 })?;
321
322 Ok(Zeroizing::new(plaintext))
323}
324
325pub fn parse_secrets(plaintext: &[u8], format: Option<SecretsFormat>) -> Result<Vec<SecretEntry>> {
339 let fmt = format.unwrap_or_else(|| SecretsFormat::detect(plaintext));
340 let text = std::str::from_utf8(plaintext)
341 .map_err(|e| SanitizeError::SecretsError(format!("invalid UTF-8: {}", e)))?;
342
343 match fmt {
344 SecretsFormat::Json => serde_json::from_str(text)
345 .map_err(|e| SanitizeError::SecretsError(format!("JSON parse: {}", e))),
346 SecretsFormat::Yaml => serde_yaml_ng::from_str(text)
347 .map_err(|e| SanitizeError::SecretsError(format!("YAML parse: {}", e))),
348 SecretsFormat::Toml => {
349 let wrapper: TomlSecrets = toml::from_str(text)
350 .map_err(|e| SanitizeError::SecretsError(format!("TOML parse: {}", e)))?;
351 Ok(wrapper.secrets)
352 }
353 }
354}
355
356pub fn serialize_secrets(entries: &[SecretEntry], format: SecretsFormat) -> Result<Vec<u8>> {
364 match format {
365 SecretsFormat::Json => serde_json::to_vec_pretty(entries)
366 .map_err(|e| SanitizeError::SecretsError(format!("JSON serialize: {}", e))),
367 SecretsFormat::Yaml => serde_yaml_ng::to_string(entries)
368 .map(|s| s.into_bytes())
369 .map_err(|e| SanitizeError::SecretsError(format!("YAML serialize: {}", e))),
370 SecretsFormat::Toml => {
371 let wrapper = TomlSecretsRef { secrets: entries };
372 toml::to_string_pretty(&wrapper)
373 .map(|s| s.into_bytes())
374 .map_err(|e| SanitizeError::SecretsError(format!("TOML serialize: {}", e)))
375 }
376 }
377}
378
379pub fn parse_category(s: &str) -> Category {
390 match s {
391 "email" => Category::Email,
392 "name" => Category::Name,
393 "phone" => Category::Phone,
394 "ipv4" => Category::IpV4,
395 "ipv6" => Category::IpV6,
396 "credit_card" => Category::CreditCard,
397 "ssn" => Category::Ssn,
398 "hostname" => Category::Hostname,
399 "mac_address" => Category::MacAddress,
400 "container_id" => Category::ContainerId,
401 "uuid" => Category::Uuid,
402 "jwt" => Category::Jwt,
403 "auth_token" => Category::AuthToken,
404 "file_path" => Category::FilePath,
405 "windows_sid" => Category::WindowsSid,
406 "url" => Category::Url,
407 "aws_arn" => Category::AwsArn,
408 "azure_resource_id" => Category::AzureResourceId,
409 other => {
410 let tag = other.strip_prefix("custom:").unwrap_or(other);
411 Category::Custom(tag.into())
412 }
413 }
414}
415
416pub fn entries_to_patterns(entries: &[SecretEntry]) -> PatternCompileResult {
425 let mut patterns = Vec::with_capacity(entries.len());
426 let mut errors = Vec::new();
427
428 for (i, entry) in entries.iter().enumerate() {
429 let category = parse_category(&entry.category);
430 let label = entry
431 .label
432 .clone()
433 .unwrap_or_else(|| truncate_label(&entry.pattern));
434
435 let result = match entry.kind.as_str() {
436 "regex" => ScanPattern::from_regex(&entry.pattern, category, label),
437 _ => ScanPattern::from_literal(&entry.pattern, category, label),
438 };
439
440 match result {
441 Ok(pat) => patterns.push(pat),
442 Err(e) => errors.push((i, e)),
443 }
444 }
445
446 (patterns, errors)
447}
448
449fn truncate_label(s: &str) -> String {
451 if s.len() <= 32 {
452 s.to_string()
453 } else {
454 format!("{}…", &s[..31])
455 }
456}
457
458pub fn load_encrypted_secrets(
487 encrypted_bytes: &[u8],
488 password: &str,
489 format: Option<SecretsFormat>,
490) -> Result<PatternCompileResult> {
491 let plaintext = decrypt_secrets(encrypted_bytes, password)?;
492 let mut entries = parse_secrets(&plaintext, format)?;
493 let result = entries_to_patterns(&entries);
494 for entry in &mut entries {
496 entry.pattern.zeroize();
497 entry.kind.zeroize();
498 entry.category.zeroize();
499 if let Some(ref mut l) = entry.label {
500 l.zeroize();
501 }
502 }
503 drop(entries);
504 Ok(result)
505}
506
507pub fn load_plaintext_secrets(
529 plaintext: &[u8],
530 format: Option<SecretsFormat>,
531) -> Result<PatternCompileResult> {
532 let mut entries = parse_secrets(plaintext, format)?;
533 let result = entries_to_patterns(&entries);
534 for entry in &mut entries {
537 entry.pattern.zeroize();
538 entry.kind.zeroize();
539 entry.category.zeroize();
540 if let Some(ref mut l) = entry.label {
541 l.zeroize();
542 }
543 }
544 drop(entries);
545 Ok(result)
546}
547
548pub fn looks_encrypted(data: &[u8]) -> bool {
562 if data.len() < MIN_ENCRYPTED_LEN {
563 return false;
566 }
567 if let Ok(text) = std::str::from_utf8(data) {
570 let trimmed = text.trim_start();
571 let has_marker = trimmed.starts_with('[')
572 || trimmed.starts_with('{')
573 || trimmed.starts_with('-')
574 || trimmed.starts_with("---")
575 || trimmed.starts_with("[[")
576 || trimmed.starts_with('#');
577 if has_marker {
578 return false;
579 }
580 }
581 true
583}
584
585pub fn load_secrets_auto(
609 data: &[u8],
610 password: Option<&str>,
611 format: Option<SecretsFormat>,
612 force_plaintext: bool,
613) -> Result<(PatternCompileResult, bool)> {
614 if force_plaintext || !looks_encrypted(data) {
615 let result = load_plaintext_secrets(data, format)?;
616 Ok((result, false))
617 } else {
618 let pw = password.ok_or_else(|| {
619 SanitizeError::SecretsError(
620 "secrets file appears encrypted but no password was provided; \
621 use --unencrypted-secrets if the file is plaintext"
622 .into(),
623 )
624 })?;
625 let result = load_encrypted_secrets(data, pw, format)?;
626 Ok((result, true))
627 }
628}
629
630#[cfg(test)]
635mod tests {
636 use super::*;
637
638 fn sample_json() -> &'static str {
639 r#"[
640 {
641 "pattern": "alice@corp\\.com",
642 "kind": "regex",
643 "category": "email",
644 "label": "alice_email"
645 },
646 {
647 "pattern": "sk-proj-abc123secret",
648 "kind": "literal",
649 "category": "custom:api_key",
650 "label": "openai_key"
651 }
652 ]"#
653 }
654
655 fn sample_yaml() -> &'static str {
656 r#"- pattern: "alice@corp\\.com"
657 kind: regex
658 category: email
659 label: alice_email
660- pattern: sk-proj-abc123secret
661 kind: literal
662 category: "custom:api_key"
663 label: openai_key
664"#
665 }
666
667 fn sample_toml() -> &'static str {
668 r#"[[secrets]]
669pattern = "alice@corp\\.com"
670kind = "regex"
671category = "email"
672label = "alice_email"
673
674[[secrets]]
675pattern = "sk-proj-abc123secret"
676kind = "literal"
677category = "custom:api_key"
678label = "openai_key"
679"#
680 }
681
682 #[test]
685 fn parse_json_entries() {
686 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
687 assert_eq!(entries.len(), 2);
688 assert_eq!(entries[0].kind, "regex");
689 assert_eq!(entries[0].category, "email");
690 assert_eq!(entries[1].kind, "literal");
691 }
692
693 #[test]
694 fn parse_yaml_entries() {
695 let entries = parse_secrets(sample_yaml().as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
696 assert_eq!(entries.len(), 2);
697 assert_eq!(entries[0].label, Some("alice_email".into()));
698 }
699
700 #[test]
701 fn parse_toml_entries() {
702 let entries = parse_secrets(sample_toml().as_bytes(), Some(SecretsFormat::Toml)).unwrap();
703 assert_eq!(entries.len(), 2);
704 assert_eq!(entries[1].pattern, "sk-proj-abc123secret");
705 }
706
707 #[test]
708 fn parse_auto_detect_json() {
709 let entries = parse_secrets(sample_json().as_bytes(), None).unwrap();
710 assert_eq!(entries.len(), 2);
711 }
712
713 #[test]
714 fn parse_auto_detect_yaml() {
715 let entries = parse_secrets(sample_yaml().as_bytes(), None).unwrap();
716 assert_eq!(entries.len(), 2);
717 }
718
719 #[test]
722 fn parse_builtin_categories() {
723 assert_eq!(parse_category("email"), Category::Email);
724 assert_eq!(parse_category("ipv4"), Category::IpV4);
725 assert_eq!(parse_category("ssn"), Category::Ssn);
726 }
727
728 #[test]
729 fn parse_custom_category() {
730 match parse_category("custom:api_key") {
731 Category::Custom(tag) => assert_eq!(tag.as_str(), "api_key"),
732 other => panic!("expected Custom, got {:?}", other),
733 }
734 }
735
736 #[test]
737 fn parse_unknown_category_becomes_custom() {
738 match parse_category("foobar") {
739 Category::Custom(tag) => assert_eq!(tag.as_str(), "foobar"),
740 other => panic!("expected Custom, got {:?}", other),
741 }
742 }
743
744 #[test]
747 fn entries_to_patterns_success() {
748 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
749 let (patterns, errors) = entries_to_patterns(&entries);
750 assert_eq!(patterns.len(), 2);
751 assert!(errors.is_empty());
752 }
753
754 #[test]
755 fn entries_to_patterns_bad_regex() {
756 let json = r#"[{"pattern": "[invalid(", "kind": "regex", "category": "email"}]"#;
757 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
758 let (patterns, errors) = entries_to_patterns(&entries);
759 assert!(patterns.is_empty());
760 assert_eq!(errors.len(), 1);
761 assert_eq!(errors[0].0, 0);
762 }
763
764 #[test]
767 fn encrypt_decrypt_roundtrip() {
768 let plaintext = sample_json().as_bytes();
769 let password = "test-password-42";
770
771 let encrypted = encrypt_secrets(plaintext, password).unwrap();
772
773 assert!(encrypted.len() > plaintext.len());
775
776 let decrypted = decrypt_secrets(&encrypted, password).unwrap();
777 assert_eq!(decrypted.as_slice(), plaintext);
778 }
779
780 #[test]
781 fn decrypt_wrong_password_fails() {
782 let plaintext = b"hello";
783 let encrypted = encrypt_secrets(plaintext, "correct").unwrap();
784 let result = decrypt_secrets(&encrypted, "wrong");
785 assert!(result.is_err());
786 }
787
788 #[test]
789 fn decrypt_truncated_blob_fails() {
790 let result = decrypt_secrets(&[0u8; 10], "any");
791 assert!(result.is_err());
792 }
793
794 #[test]
795 fn decrypt_tampered_blob_fails() {
796 let plaintext = b"hello world";
797 let mut encrypted = encrypt_secrets(plaintext, "pw").unwrap();
798 let last = encrypted.len() - 1;
800 encrypted[last] ^= 0xFF;
801 let result = decrypt_secrets(&encrypted, "pw");
802 assert!(result.is_err());
803 }
804
805 #[test]
806 fn encrypt_empty_password_rejected() {
807 let result = encrypt_secrets(b"hello", "");
808 assert!(result.is_err());
809 }
810
811 #[test]
814 fn full_pipeline_json() {
815 let plaintext = sample_json().as_bytes();
816 let password = "pipeline-test";
817
818 let encrypted = encrypt_secrets(plaintext, password).unwrap();
819 let (patterns, errors) =
820 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Json)).unwrap();
821
822 assert_eq!(patterns.len(), 2);
823 assert!(errors.is_empty());
824 assert_eq!(patterns[0].label(), "alice_email");
825 assert_eq!(patterns[1].label(), "openai_key");
826 }
827
828 #[test]
829 fn full_pipeline_yaml() {
830 let plaintext = sample_yaml().as_bytes();
831 let password = "yaml-test";
832
833 let encrypted = encrypt_secrets(plaintext, password).unwrap();
834 let (patterns, errors) =
835 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Yaml)).unwrap();
836
837 assert_eq!(patterns.len(), 2);
838 assert!(errors.is_empty());
839 }
840
841 #[test]
842 fn full_pipeline_toml() {
843 let plaintext = sample_toml().as_bytes();
844 let password = "toml-test";
845
846 let encrypted = encrypt_secrets(plaintext, password).unwrap();
847 let (patterns, errors) =
848 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Toml)).unwrap();
849
850 assert_eq!(patterns.len(), 2);
851 assert!(errors.is_empty());
852 }
853
854 #[test]
857 fn load_plaintext_secrets_works() {
858 let (patterns, errors) =
859 load_plaintext_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
860 assert_eq!(patterns.len(), 2);
861 assert!(errors.is_empty());
862 }
863
864 #[test]
867 fn serialize_roundtrip_json() {
868 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
869 let serialized = serialize_secrets(&entries, SecretsFormat::Json).unwrap();
870 let reparsed = parse_secrets(&serialized, Some(SecretsFormat::Json)).unwrap();
871 assert_eq!(entries.len(), reparsed.len());
872 assert_eq!(entries[0].pattern, reparsed[0].pattern);
873 }
874
875 #[test]
878 fn format_from_extension() {
879 assert_eq!(
880 SecretsFormat::from_extension("secrets.json"),
881 Some(SecretsFormat::Json)
882 );
883 assert_eq!(
884 SecretsFormat::from_extension("secrets.json.enc"),
885 Some(SecretsFormat::Json)
886 );
887 assert_eq!(
888 SecretsFormat::from_extension("secrets.yaml"),
889 Some(SecretsFormat::Yaml)
890 );
891 assert_eq!(
892 SecretsFormat::from_extension("secrets.yml.enc"),
893 Some(SecretsFormat::Yaml)
894 );
895 assert_eq!(
896 SecretsFormat::from_extension("secrets.toml"),
897 Some(SecretsFormat::Toml)
898 );
899 assert_eq!(SecretsFormat::from_extension("secrets.txt"), None);
900 }
901
902 #[test]
905 fn default_kind_is_literal() {
906 let json = r#"[{"pattern": "foo"}]"#;
907 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
908 assert_eq!(entries[0].kind, "literal");
909 }
910
911 #[test]
912 fn default_category_is_custom_secret() {
913 let json = r#"[{"pattern": "foo"}]"#;
914 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
915 assert_eq!(entries[0].category, "custom:secret");
916 }
917
918 #[test]
919 fn default_label_from_pattern() {
920 let json = r#"[{"pattern": "short"}]"#;
921 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
922 let (patterns, _) = entries_to_patterns(&entries);
923 assert_eq!(patterns[0].label(), "short");
924 }
925
926 #[test]
929 fn looks_encrypted_json_plaintext() {
930 assert!(!looks_encrypted(sample_json().as_bytes()));
931 }
932
933 #[test]
934 fn looks_encrypted_yaml_plaintext() {
935 assert!(!looks_encrypted(sample_yaml().as_bytes()));
936 }
937
938 #[test]
939 fn looks_encrypted_toml_plaintext() {
940 assert!(!looks_encrypted(sample_toml().as_bytes()));
941 }
942
943 #[test]
944 fn looks_encrypted_actual_encrypted() {
945 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
946 assert!(looks_encrypted(&encrypted));
947 }
948
949 #[test]
950 fn looks_encrypted_too_short() {
951 assert!(!looks_encrypted(&[0u8; 10]));
952 }
953
954 #[test]
957 fn auto_load_plaintext_json() {
958 let data = sample_json().as_bytes();
959 let ((pats, errs), was_enc) =
960 load_secrets_auto(data, None, Some(SecretsFormat::Json), false).unwrap();
961 assert!(!was_enc);
962 assert_eq!(pats.len(), 2);
963 assert!(errs.is_empty());
964 }
965
966 #[test]
967 fn auto_load_encrypted_json() {
968 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
969 let ((pats, errs), was_enc) =
970 load_secrets_auto(&encrypted, Some("pw"), Some(SecretsFormat::Json), false).unwrap();
971 assert!(was_enc);
972 assert_eq!(pats.len(), 2);
973 assert!(errs.is_empty());
974 }
975
976 #[test]
977 fn auto_load_force_plaintext() {
978 let data = sample_json().as_bytes();
979 let ((pats, _), was_enc) =
980 load_secrets_auto(data, None, Some(SecretsFormat::Json), true).unwrap();
981 assert!(!was_enc);
982 assert_eq!(pats.len(), 2);
983 }
984
985 #[test]
986 fn auto_load_encrypted_no_password_fails() {
987 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
988 let result = load_secrets_auto(&encrypted, None, None, false);
989 assert!(result.is_err());
990 }
991}